diff options
Diffstat (limited to 'net')
161 files changed, 4805 insertions, 3523 deletions
diff --git a/net/802/garp.c b/net/802/garp.c index 8456f5d98b85..5d9630a0eb93 100644 --- a/net/802/garp.c +++ b/net/802/garp.c @@ -609,8 +609,12 @@ void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl /* Delete timer and generate a final TRANSMIT_PDU event to flush out * all pending messages before the applicant is gone. */ del_timer_sync(&app->join_timer); + + spin_lock_bh(&app->lock); garp_gid_event(app, GARP_EVENT_TRANSMIT_PDU); garp_pdu_queue(app); + spin_unlock_bh(&app->lock); + garp_queue_xmit(app); dev_mc_del(dev, appl->proto.group_address); diff --git a/net/atm/common.c b/net/atm/common.c index 7b491006eaf4..737bef59ce89 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -531,6 +531,8 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, struct sk_buff *skb; int copied, error = -EINVAL; + msg->msg_namelen = 0; + if (sock->state != SS_CONNECTED) return -ENOTCONN; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 7b11f8bc5071..e277e38f736b 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1642,6 +1642,7 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock, ax25_address src; const unsigned char *mac = skb_mac_header(skb); + memset(sax, 0, sizeof(struct full_sockaddr_ax25)); ax25_addr_parse(mac + 1, skb->data - mac - 1, &src, NULL, &digi, NULL, NULL); sax->sax25_family = AF_AX25; diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c index eb0f4b16ff09..17f33a62f6db 100644 --- a/net/bluetooth/a2mp.c +++ b/net/bluetooth/a2mp.c @@ -397,13 +397,12 @@ static int a2mp_getampassoc_rsp(struct amp_mgr *mgr, struct sk_buff *skb, if (ctrl) { u8 *assoc; - assoc = kzalloc(assoc_len, GFP_KERNEL); + assoc = kmemdup(rsp->amp_assoc, assoc_len, GFP_KERNEL); if (!assoc) { amp_ctrl_put(ctrl); return -ENOMEM; } - memcpy(assoc, rsp->amp_assoc, assoc_len); ctrl->assoc = assoc; ctrl->assoc_len = assoc_len; ctrl->assoc_rem_len = assoc_len; @@ -472,13 +471,12 @@ static int a2mp_createphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb, size_t assoc_len = le16_to_cpu(hdr->len) - sizeof(*req); u8 *assoc; - assoc = kzalloc(assoc_len, GFP_KERNEL); + assoc = kmemdup(req->amp_assoc, assoc_len, GFP_KERNEL); if (!assoc) { amp_ctrl_put(ctrl); return -ENOMEM; } - memcpy(assoc, req->amp_assoc, assoc_len); ctrl->assoc = assoc; ctrl->assoc_len = assoc_len; ctrl->assoc_rem_len = assoc_len; diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index d3ee69b35a78..e5338f787d68 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -92,23 +92,14 @@ int bt_sock_register(int proto, const struct net_proto_family *ops) } EXPORT_SYMBOL(bt_sock_register); -int bt_sock_unregister(int proto) +void bt_sock_unregister(int proto) { - int err = 0; - if (proto < 0 || proto >= BT_MAX_PROTO) - return -EINVAL; + return; write_lock(&bt_proto_lock); - - if (!bt_proto[proto]) - err = -ENOENT; - else - bt_proto[proto] = NULL; - + bt_proto[proto] = NULL; write_unlock(&bt_proto_lock); - - return err; } EXPORT_SYMBOL(bt_sock_unregister); @@ -230,6 +221,8 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (flags & (MSG_OOB)) return -EOPNOTSUPP; + msg->msg_namelen = 0; + skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) { if (sk->sk_shutdown & RCV_SHUTDOWN) @@ -237,8 +230,6 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, return err; } - msg->msg_namelen = 0; - copied = skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; @@ -422,7 +413,8 @@ unsigned int bt_sock_poll(struct file *file, struct socket *sock, return bt_accept_poll(sk); if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR; + mask |= POLLERR | + (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= POLLRDHUP | POLLIN | POLLRDNORM; diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index e7154a58465f..5b1c04e28821 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -253,8 +253,6 @@ error: void __exit bnep_sock_cleanup(void) { bt_procfs_cleanup(&init_net, "bnep"); - if (bt_sock_unregister(BTPROTO_BNEP) < 0) - BT_ERR("Can't unregister BNEP socket"); - + bt_sock_unregister(BTPROTO_BNEP); proto_unregister(&bnep_proto); } diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c index 1c57482112b6..58d9edebab4b 100644 --- a/net/bluetooth/cmtp/sock.c +++ b/net/bluetooth/cmtp/sock.c @@ -264,8 +264,6 @@ error: void cmtp_cleanup_sockets(void) { bt_procfs_cleanup(&init_net, "cmtp"); - if (bt_sock_unregister(BTPROTO_CMTP) < 0) - BT_ERR("Can't unregister CMTP socket"); - + bt_sock_unregister(BTPROTO_CMTP); proto_unregister(&cmtp_proto); } diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 4925a02ae7e4..b9f90169940b 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -117,7 +117,7 @@ static void hci_acl_create_connection_cancel(struct hci_conn *conn) hci_send_cmd(conn->hdev, HCI_OP_CREATE_CONN_CANCEL, sizeof(cp), &cp); } -void hci_acl_disconn(struct hci_conn *conn, __u8 reason) +void hci_disconnect(struct hci_conn *conn, __u8 reason) { struct hci_cp_disconnect cp; @@ -253,7 +253,7 @@ static void hci_conn_disconnect(struct hci_conn *conn) hci_amp_disconn(conn, reason); break; default: - hci_acl_disconn(conn, reason); + hci_disconnect(conn, reason); break; } } diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 60793e7b768b..cfcad5423f1c 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -57,36 +57,9 @@ static void hci_notify(struct hci_dev *hdev, int event) /* ---- HCI requests ---- */ -void hci_req_complete(struct hci_dev *hdev, __u16 cmd, int result) +static void hci_req_sync_complete(struct hci_dev *hdev, u8 result) { - BT_DBG("%s command 0x%4.4x result 0x%2.2x", hdev->name, cmd, result); - - /* If this is the init phase check if the completed command matches - * the last init command, and if not just return. - */ - if (test_bit(HCI_INIT, &hdev->flags) && hdev->init_last_cmd != cmd) { - struct hci_command_hdr *sent = (void *) hdev->sent_cmd->data; - u16 opcode = __le16_to_cpu(sent->opcode); - struct sk_buff *skb; - - /* Some CSR based controllers generate a spontaneous - * reset complete event during init and any pending - * command will never be completed. In such a case we - * need to resend whatever was the last sent - * command. - */ - - if (cmd != HCI_OP_RESET || opcode == HCI_OP_RESET) - return; - - skb = skb_clone(hdev->sent_cmd, GFP_ATOMIC); - if (skb) { - skb_queue_head(&hdev->cmd_q, skb); - queue_work(hdev->workqueue, &hdev->cmd_work); - } - - return; - } + BT_DBG("%s result 0x%2.2x", hdev->name, result); if (hdev->req_status == HCI_REQ_PEND) { hdev->req_result = result; @@ -107,21 +80,41 @@ static void hci_req_cancel(struct hci_dev *hdev, int err) } /* Execute request and wait for completion. */ -static int __hci_request(struct hci_dev *hdev, - void (*req)(struct hci_dev *hdev, unsigned long opt), - unsigned long opt, __u32 timeout) +static int __hci_req_sync(struct hci_dev *hdev, + void (*func)(struct hci_request *req, + unsigned long opt), + unsigned long opt, __u32 timeout) { + struct hci_request req; DECLARE_WAITQUEUE(wait, current); int err = 0; BT_DBG("%s start", hdev->name); + hci_req_init(&req, hdev); + hdev->req_status = HCI_REQ_PEND; + func(&req, opt); + + err = hci_req_run(&req, hci_req_sync_complete); + if (err < 0) { + hdev->req_status = 0; + + /* ENODATA means the HCI request command queue is empty. + * This can happen when a request with conditionals doesn't + * trigger any commands to be sent. This is normal behavior + * and should not trigger an error return. + */ + if (err == -ENODATA) + return 0; + + return err; + } + add_wait_queue(&hdev->req_wait_q, &wait); set_current_state(TASK_INTERRUPTIBLE); - req(hdev, opt); schedule_timeout(timeout); remove_wait_queue(&hdev->req_wait_q, &wait); @@ -150,9 +143,10 @@ static int __hci_request(struct hci_dev *hdev, return err; } -static int hci_request(struct hci_dev *hdev, - void (*req)(struct hci_dev *hdev, unsigned long opt), - unsigned long opt, __u32 timeout) +static int hci_req_sync(struct hci_dev *hdev, + void (*req)(struct hci_request *req, + unsigned long opt), + unsigned long opt, __u32 timeout) { int ret; @@ -161,75 +155,86 @@ static int hci_request(struct hci_dev *hdev, /* Serialize all requests */ hci_req_lock(hdev); - ret = __hci_request(hdev, req, opt, timeout); + ret = __hci_req_sync(hdev, req, opt, timeout); hci_req_unlock(hdev); return ret; } -static void hci_reset_req(struct hci_dev *hdev, unsigned long opt) +static void hci_reset_req(struct hci_request *req, unsigned long opt) { - BT_DBG("%s %ld", hdev->name, opt); + BT_DBG("%s %ld", req->hdev->name, opt); /* Reset device */ - set_bit(HCI_RESET, &hdev->flags); - hci_send_cmd(hdev, HCI_OP_RESET, 0, NULL); + set_bit(HCI_RESET, &req->hdev->flags); + hci_req_add(req, HCI_OP_RESET, 0, NULL); } -static void bredr_init(struct hci_dev *hdev) +static void bredr_init(struct hci_request *req) { - hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_PACKET_BASED; + req->hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_PACKET_BASED; /* Read Local Supported Features */ - hci_send_cmd(hdev, HCI_OP_READ_LOCAL_FEATURES, 0, NULL); + hci_req_add(req, HCI_OP_READ_LOCAL_FEATURES, 0, NULL); /* Read Local Version */ - hci_send_cmd(hdev, HCI_OP_READ_LOCAL_VERSION, 0, NULL); + hci_req_add(req, HCI_OP_READ_LOCAL_VERSION, 0, NULL); + + /* Read BD Address */ + hci_req_add(req, HCI_OP_READ_BD_ADDR, 0, NULL); } -static void amp_init(struct hci_dev *hdev) +static void amp_init(struct hci_request *req) { - hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_BLOCK_BASED; + req->hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_BLOCK_BASED; /* Read Local Version */ - hci_send_cmd(hdev, HCI_OP_READ_LOCAL_VERSION, 0, NULL); + hci_req_add(req, HCI_OP_READ_LOCAL_VERSION, 0, NULL); /* Read Local AMP Info */ - hci_send_cmd(hdev, HCI_OP_READ_LOCAL_AMP_INFO, 0, NULL); + hci_req_add(req, HCI_OP_READ_LOCAL_AMP_INFO, 0, NULL); /* Read Data Blk size */ - hci_send_cmd(hdev, HCI_OP_READ_DATA_BLOCK_SIZE, 0, NULL); + hci_req_add(req, HCI_OP_READ_DATA_BLOCK_SIZE, 0, NULL); } -static void hci_init_req(struct hci_dev *hdev, unsigned long opt) +static void hci_init1_req(struct hci_request *req, unsigned long opt) { + struct hci_dev *hdev = req->hdev; + struct hci_request init_req; struct sk_buff *skb; BT_DBG("%s %ld", hdev->name, opt); /* Driver initialization */ + hci_req_init(&init_req, hdev); + /* Special commands */ while ((skb = skb_dequeue(&hdev->driver_init))) { bt_cb(skb)->pkt_type = HCI_COMMAND_PKT; skb->dev = (void *) hdev; - skb_queue_tail(&hdev->cmd_q, skb); - queue_work(hdev->workqueue, &hdev->cmd_work); + if (skb_queue_empty(&init_req.cmd_q)) + bt_cb(skb)->req.start = true; + + skb_queue_tail(&init_req.cmd_q, skb); } skb_queue_purge(&hdev->driver_init); + hci_req_run(&init_req, NULL); + /* Reset */ if (!test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) - hci_reset_req(hdev, 0); + hci_reset_req(req, 0); switch (hdev->dev_type) { case HCI_BREDR: - bredr_init(hdev); + bredr_init(req); break; case HCI_AMP: - amp_init(hdev); + amp_init(req); break; default: @@ -238,44 +243,327 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt) } } -static void hci_scan_req(struct hci_dev *hdev, unsigned long opt) +static void bredr_setup(struct hci_request *req) +{ + struct hci_cp_delete_stored_link_key cp; + __le16 param; + __u8 flt_type; + + /* Read Buffer Size (ACL mtu, max pkt, etc.) */ + hci_req_add(req, HCI_OP_READ_BUFFER_SIZE, 0, NULL); + + /* Read Class of Device */ + hci_req_add(req, HCI_OP_READ_CLASS_OF_DEV, 0, NULL); + + /* Read Local Name */ + hci_req_add(req, HCI_OP_READ_LOCAL_NAME, 0, NULL); + + /* Read Voice Setting */ + hci_req_add(req, HCI_OP_READ_VOICE_SETTING, 0, NULL); + + /* Clear Event Filters */ + flt_type = HCI_FLT_CLEAR_ALL; + hci_req_add(req, HCI_OP_SET_EVENT_FLT, 1, &flt_type); + + /* Connection accept timeout ~20 secs */ + param = __constant_cpu_to_le16(0x7d00); + hci_req_add(req, HCI_OP_WRITE_CA_TIMEOUT, 2, ¶m); + + bacpy(&cp.bdaddr, BDADDR_ANY); + cp.delete_all = 0x01; + hci_req_add(req, HCI_OP_DELETE_STORED_LINK_KEY, sizeof(cp), &cp); + + /* Read page scan parameters */ + if (req->hdev->hci_ver > BLUETOOTH_VER_1_1) { + hci_req_add(req, HCI_OP_READ_PAGE_SCAN_ACTIVITY, 0, NULL); + hci_req_add(req, HCI_OP_READ_PAGE_SCAN_TYPE, 0, NULL); + } +} + +static void le_setup(struct hci_request *req) +{ + /* Read LE Buffer Size */ + hci_req_add(req, HCI_OP_LE_READ_BUFFER_SIZE, 0, NULL); + + /* Read LE Local Supported Features */ + hci_req_add(req, HCI_OP_LE_READ_LOCAL_FEATURES, 0, NULL); + + /* Read LE Advertising Channel TX Power */ + hci_req_add(req, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL); + + /* Read LE White List Size */ + hci_req_add(req, HCI_OP_LE_READ_WHITE_LIST_SIZE, 0, NULL); + + /* Read LE Supported States */ + hci_req_add(req, HCI_OP_LE_READ_SUPPORTED_STATES, 0, NULL); +} + +static u8 hci_get_inquiry_mode(struct hci_dev *hdev) +{ + if (lmp_ext_inq_capable(hdev)) + return 0x02; + + if (lmp_inq_rssi_capable(hdev)) + return 0x01; + + if (hdev->manufacturer == 11 && hdev->hci_rev == 0x00 && + hdev->lmp_subver == 0x0757) + return 0x01; + + if (hdev->manufacturer == 15) { + if (hdev->hci_rev == 0x03 && hdev->lmp_subver == 0x6963) + return 0x01; + if (hdev->hci_rev == 0x09 && hdev->lmp_subver == 0x6963) + return 0x01; + if (hdev->hci_rev == 0x00 && hdev->lmp_subver == 0x6965) + return 0x01; + } + + if (hdev->manufacturer == 31 && hdev->hci_rev == 0x2005 && + hdev->lmp_subver == 0x1805) + return 0x01; + + return 0x00; +} + +static void hci_setup_inquiry_mode(struct hci_request *req) +{ + u8 mode; + + mode = hci_get_inquiry_mode(req->hdev); + + hci_req_add(req, HCI_OP_WRITE_INQUIRY_MODE, 1, &mode); +} + +static void hci_setup_event_mask(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + + /* The second byte is 0xff instead of 0x9f (two reserved bits + * disabled) since a Broadcom 1.2 dongle doesn't respond to the + * command otherwise. + */ + u8 events[8] = { 0xff, 0xff, 0xfb, 0xff, 0x00, 0x00, 0x00, 0x00 }; + + /* CSR 1.1 dongles does not accept any bitfield so don't try to set + * any event mask for pre 1.2 devices. + */ + if (hdev->hci_ver < BLUETOOTH_VER_1_2) + return; + + if (lmp_bredr_capable(hdev)) { + events[4] |= 0x01; /* Flow Specification Complete */ + events[4] |= 0x02; /* Inquiry Result with RSSI */ + events[4] |= 0x04; /* Read Remote Extended Features Complete */ + events[5] |= 0x08; /* Synchronous Connection Complete */ + events[5] |= 0x10; /* Synchronous Connection Changed */ + } + + if (lmp_inq_rssi_capable(hdev)) + events[4] |= 0x02; /* Inquiry Result with RSSI */ + + if (lmp_sniffsubr_capable(hdev)) + events[5] |= 0x20; /* Sniff Subrating */ + + if (lmp_pause_enc_capable(hdev)) + events[5] |= 0x80; /* Encryption Key Refresh Complete */ + + if (lmp_ext_inq_capable(hdev)) + events[5] |= 0x40; /* Extended Inquiry Result */ + + if (lmp_no_flush_capable(hdev)) + events[7] |= 0x01; /* Enhanced Flush Complete */ + + if (lmp_lsto_capable(hdev)) + events[6] |= 0x80; /* Link Supervision Timeout Changed */ + + if (lmp_ssp_capable(hdev)) { + events[6] |= 0x01; /* IO Capability Request */ + events[6] |= 0x02; /* IO Capability Response */ + events[6] |= 0x04; /* User Confirmation Request */ + events[6] |= 0x08; /* User Passkey Request */ + events[6] |= 0x10; /* Remote OOB Data Request */ + events[6] |= 0x20; /* Simple Pairing Complete */ + events[7] |= 0x04; /* User Passkey Notification */ + events[7] |= 0x08; /* Keypress Notification */ + events[7] |= 0x10; /* Remote Host Supported + * Features Notification + */ + } + + if (lmp_le_capable(hdev)) + events[7] |= 0x20; /* LE Meta-Event */ + + hci_req_add(req, HCI_OP_SET_EVENT_MASK, sizeof(events), events); + + if (lmp_le_capable(hdev)) { + memset(events, 0, sizeof(events)); + events[0] = 0x1f; + hci_req_add(req, HCI_OP_LE_SET_EVENT_MASK, + sizeof(events), events); + } +} + +static void hci_init2_req(struct hci_request *req, unsigned long opt) +{ + struct hci_dev *hdev = req->hdev; + + if (lmp_bredr_capable(hdev)) + bredr_setup(req); + + if (lmp_le_capable(hdev)) + le_setup(req); + + hci_setup_event_mask(req); + + if (hdev->hci_ver > BLUETOOTH_VER_1_1) + hci_req_add(req, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL); + + if (lmp_ssp_capable(hdev)) { + if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) { + u8 mode = 0x01; + hci_req_add(req, HCI_OP_WRITE_SSP_MODE, + sizeof(mode), &mode); + } else { + struct hci_cp_write_eir cp; + + memset(hdev->eir, 0, sizeof(hdev->eir)); + memset(&cp, 0, sizeof(cp)); + + hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp); + } + } + + if (lmp_inq_rssi_capable(hdev)) + hci_setup_inquiry_mode(req); + + if (lmp_inq_tx_pwr_capable(hdev)) + hci_req_add(req, HCI_OP_READ_INQ_RSP_TX_POWER, 0, NULL); + + if (lmp_ext_feat_capable(hdev)) { + struct hci_cp_read_local_ext_features cp; + + cp.page = 0x01; + hci_req_add(req, HCI_OP_READ_LOCAL_EXT_FEATURES, + sizeof(cp), &cp); + } + + if (test_bit(HCI_LINK_SECURITY, &hdev->dev_flags)) { + u8 enable = 1; + hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, sizeof(enable), + &enable); + } +} + +static void hci_setup_link_policy(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_write_def_link_policy cp; + u16 link_policy = 0; + + if (lmp_rswitch_capable(hdev)) + link_policy |= HCI_LP_RSWITCH; + if (lmp_hold_capable(hdev)) + link_policy |= HCI_LP_HOLD; + if (lmp_sniff_capable(hdev)) + link_policy |= HCI_LP_SNIFF; + if (lmp_park_capable(hdev)) + link_policy |= HCI_LP_PARK; + + cp.policy = cpu_to_le16(link_policy); + hci_req_add(req, HCI_OP_WRITE_DEF_LINK_POLICY, sizeof(cp), &cp); +} + +static void hci_set_le_support(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_write_le_host_supported cp; + + memset(&cp, 0, sizeof(cp)); + + if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) { + cp.le = 0x01; + cp.simul = lmp_le_br_capable(hdev); + } + + if (cp.le != lmp_host_le_capable(hdev)) + hci_req_add(req, HCI_OP_WRITE_LE_HOST_SUPPORTED, sizeof(cp), + &cp); +} + +static void hci_init3_req(struct hci_request *req, unsigned long opt) +{ + struct hci_dev *hdev = req->hdev; + + if (hdev->commands[5] & 0x10) + hci_setup_link_policy(req); + + if (lmp_le_capable(hdev)) { + hci_set_le_support(req); + hci_update_ad(req); + } +} + +static int __hci_init(struct hci_dev *hdev) +{ + int err; + + err = __hci_req_sync(hdev, hci_init1_req, 0, HCI_INIT_TIMEOUT); + if (err < 0) + return err; + + /* HCI_BREDR covers both single-mode LE, BR/EDR and dual-mode + * BR/EDR/LE type controllers. AMP controllers only need the + * first stage init. + */ + if (hdev->dev_type != HCI_BREDR) + return 0; + + err = __hci_req_sync(hdev, hci_init2_req, 0, HCI_INIT_TIMEOUT); + if (err < 0) + return err; + + return __hci_req_sync(hdev, hci_init3_req, 0, HCI_INIT_TIMEOUT); +} + +static void hci_scan_req(struct hci_request *req, unsigned long opt) { __u8 scan = opt; - BT_DBG("%s %x", hdev->name, scan); + BT_DBG("%s %x", req->hdev->name, scan); /* Inquiry and Page scans */ - hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); + hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); } -static void hci_auth_req(struct hci_dev *hdev, unsigned long opt) +static void hci_auth_req(struct hci_request *req, unsigned long opt) { __u8 auth = opt; - BT_DBG("%s %x", hdev->name, auth); + BT_DBG("%s %x", req->hdev->name, auth); /* Authentication */ - hci_send_cmd(hdev, HCI_OP_WRITE_AUTH_ENABLE, 1, &auth); + hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, 1, &auth); } -static void hci_encrypt_req(struct hci_dev *hdev, unsigned long opt) +static void hci_encrypt_req(struct hci_request *req, unsigned long opt) { __u8 encrypt = opt; - BT_DBG("%s %x", hdev->name, encrypt); + BT_DBG("%s %x", req->hdev->name, encrypt); /* Encryption */ - hci_send_cmd(hdev, HCI_OP_WRITE_ENCRYPT_MODE, 1, &encrypt); + hci_req_add(req, HCI_OP_WRITE_ENCRYPT_MODE, 1, &encrypt); } -static void hci_linkpol_req(struct hci_dev *hdev, unsigned long opt) +static void hci_linkpol_req(struct hci_request *req, unsigned long opt) { __le16 policy = cpu_to_le16(opt); - BT_DBG("%s %x", hdev->name, policy); + BT_DBG("%s %x", req->hdev->name, policy); /* Default link policy */ - hci_send_cmd(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy); + hci_req_add(req, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy); } /* Get HCI device by index. @@ -512,9 +800,10 @@ static int inquiry_cache_dump(struct hci_dev *hdev, int num, __u8 *buf) return copied; } -static void hci_inq_req(struct hci_dev *hdev, unsigned long opt) +static void hci_inq_req(struct hci_request *req, unsigned long opt) { struct hci_inquiry_req *ir = (struct hci_inquiry_req *) opt; + struct hci_dev *hdev = req->hdev; struct hci_cp_inquiry cp; BT_DBG("%s", hdev->name); @@ -526,7 +815,7 @@ static void hci_inq_req(struct hci_dev *hdev, unsigned long opt) memcpy(&cp.lap, &ir->lap, 3); cp.length = ir->length; cp.num_rsp = ir->num_rsp; - hci_send_cmd(hdev, HCI_OP_INQUIRY, sizeof(cp), &cp); + hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp); } int hci_inquiry(void __user *arg) @@ -556,7 +845,8 @@ int hci_inquiry(void __user *arg) timeo = ir.length * msecs_to_jiffies(2000); if (do_inquiry) { - err = hci_request(hdev, hci_inq_req, (unsigned long)&ir, timeo); + err = hci_req_sync(hdev, hci_inq_req, (unsigned long) &ir, + timeo); if (err < 0) goto done; } @@ -654,39 +944,29 @@ static u8 create_ad(struct hci_dev *hdev, u8 *ptr) return ad_len; } -int hci_update_ad(struct hci_dev *hdev) +void hci_update_ad(struct hci_request *req) { + struct hci_dev *hdev = req->hdev; struct hci_cp_le_set_adv_data cp; u8 len; - int err; - hci_dev_lock(hdev); - - if (!lmp_le_capable(hdev)) { - err = -EINVAL; - goto unlock; - } + if (!lmp_le_capable(hdev)) + return; memset(&cp, 0, sizeof(cp)); len = create_ad(hdev, cp.data); if (hdev->adv_data_len == len && - memcmp(cp.data, hdev->adv_data, len) == 0) { - err = 0; - goto unlock; - } + memcmp(cp.data, hdev->adv_data, len) == 0) + return; memcpy(hdev->adv_data, cp.data, sizeof(cp.data)); hdev->adv_data_len = len; cp.length = len; - err = hci_send_cmd(hdev, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp); -unlock: - hci_dev_unlock(hdev); - - return err; + hci_req_add(req, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp); } /* ---- HCI ioctl helpers ---- */ @@ -735,10 +1015,7 @@ int hci_dev_open(__u16 dev) if (!test_bit(HCI_RAW, &hdev->flags)) { atomic_set(&hdev->cmd_cnt, 1); set_bit(HCI_INIT, &hdev->flags); - hdev->init_last_cmd = 0; - - ret = __hci_request(hdev, hci_init_req, 0, HCI_INIT_TIMEOUT); - + ret = __hci_init(hdev); clear_bit(HCI_INIT, &hdev->flags); } @@ -746,7 +1023,6 @@ int hci_dev_open(__u16 dev) hci_dev_hold(hdev); set_bit(HCI_UP, &hdev->flags); hci_notify(hdev, HCI_DEV_UP); - hci_update_ad(hdev); if (!test_bit(HCI_SETUP, &hdev->dev_flags) && mgmt_valid_hdev(hdev)) { hci_dev_lock(hdev); @@ -828,7 +1104,7 @@ static int hci_dev_do_close(struct hci_dev *hdev) if (!test_bit(HCI_RAW, &hdev->flags) && test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) { set_bit(HCI_INIT, &hdev->flags); - __hci_request(hdev, hci_reset_req, 0, HCI_CMD_TIMEOUT); + __hci_req_sync(hdev, hci_reset_req, 0, HCI_CMD_TIMEOUT); clear_bit(HCI_INIT, &hdev->flags); } @@ -851,6 +1127,10 @@ static int hci_dev_do_close(struct hci_dev *hdev) * and no tasks are scheduled. */ hdev->close(hdev); + /* Clear flags */ + hdev->flags = 0; + hdev->dev_flags &= ~HCI_PERSISTENT_MASK; + if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags) && mgmt_valid_hdev(hdev)) { hci_dev_lock(hdev); @@ -858,9 +1138,6 @@ static int hci_dev_do_close(struct hci_dev *hdev) hci_dev_unlock(hdev); } - /* Clear flags */ - hdev->flags = 0; - /* Controller radio is available but is currently powered down */ hdev->amp_status = 0; @@ -921,7 +1198,7 @@ int hci_dev_reset(__u16 dev) hdev->acl_cnt = 0; hdev->sco_cnt = 0; hdev->le_cnt = 0; if (!test_bit(HCI_RAW, &hdev->flags)) - ret = __hci_request(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT); + ret = __hci_req_sync(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT); done: hci_req_unlock(hdev); @@ -960,8 +1237,8 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) switch (cmd) { case HCISETAUTH: - err = hci_request(hdev, hci_auth_req, dr.dev_opt, - HCI_INIT_TIMEOUT); + err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt, + HCI_INIT_TIMEOUT); break; case HCISETENCRYPT: @@ -972,24 +1249,24 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) if (!test_bit(HCI_AUTH, &hdev->flags)) { /* Auth must be enabled first */ - err = hci_request(hdev, hci_auth_req, dr.dev_opt, - HCI_INIT_TIMEOUT); + err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt, + HCI_INIT_TIMEOUT); if (err) break; } - err = hci_request(hdev, hci_encrypt_req, dr.dev_opt, - HCI_INIT_TIMEOUT); + err = hci_req_sync(hdev, hci_encrypt_req, dr.dev_opt, + HCI_INIT_TIMEOUT); break; case HCISETSCAN: - err = hci_request(hdev, hci_scan_req, dr.dev_opt, - HCI_INIT_TIMEOUT); + err = hci_req_sync(hdev, hci_scan_req, dr.dev_opt, + HCI_INIT_TIMEOUT); break; case HCISETLINKPOL: - err = hci_request(hdev, hci_linkpol_req, dr.dev_opt, - HCI_INIT_TIMEOUT); + err = hci_req_sync(hdev, hci_linkpol_req, dr.dev_opt, + HCI_INIT_TIMEOUT); break; case HCISETLINKMODE: @@ -1566,7 +1843,7 @@ int hci_blacklist_del(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type) return mgmt_device_unblocked(hdev, bdaddr, type); } -static void le_scan_param_req(struct hci_dev *hdev, unsigned long opt) +static void le_scan_param_req(struct hci_request *req, unsigned long opt) { struct le_scan_params *param = (struct le_scan_params *) opt; struct hci_cp_le_set_scan_param cp; @@ -1576,10 +1853,10 @@ static void le_scan_param_req(struct hci_dev *hdev, unsigned long opt) cp.interval = cpu_to_le16(param->interval); cp.window = cpu_to_le16(param->window); - hci_send_cmd(hdev, HCI_OP_LE_SET_SCAN_PARAM, sizeof(cp), &cp); + hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(cp), &cp); } -static void le_scan_enable_req(struct hci_dev *hdev, unsigned long opt) +static void le_scan_enable_req(struct hci_request *req, unsigned long opt) { struct hci_cp_le_set_scan_enable cp; @@ -1587,7 +1864,7 @@ static void le_scan_enable_req(struct hci_dev *hdev, unsigned long opt) cp.enable = 1; cp.filter_dup = 1; - hci_send_cmd(hdev, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); + hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); } static int hci_do_le_scan(struct hci_dev *hdev, u8 type, u16 interval, @@ -1608,10 +1885,10 @@ static int hci_do_le_scan(struct hci_dev *hdev, u8 type, u16 interval, hci_req_lock(hdev); - err = __hci_request(hdev, le_scan_param_req, (unsigned long) ¶m, - timeo); + err = __hci_req_sync(hdev, le_scan_param_req, (unsigned long) ¶m, + timeo); if (!err) - err = __hci_request(hdev, le_scan_enable_req, 0, timeo); + err = __hci_req_sync(hdev, le_scan_enable_req, 0, timeo); hci_req_unlock(hdev); @@ -2160,20 +2437,55 @@ static int hci_send_frame(struct sk_buff *skb) return hdev->send(skb); } -/* Send HCI command */ -int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param) +void hci_req_init(struct hci_request *req, struct hci_dev *hdev) +{ + skb_queue_head_init(&req->cmd_q); + req->hdev = hdev; + req->err = 0; +} + +int hci_req_run(struct hci_request *req, hci_req_complete_t complete) +{ + struct hci_dev *hdev = req->hdev; + struct sk_buff *skb; + unsigned long flags; + + BT_DBG("length %u", skb_queue_len(&req->cmd_q)); + + /* If an error occured during request building, remove all HCI + * commands queued on the HCI request queue. + */ + if (req->err) { + skb_queue_purge(&req->cmd_q); + return req->err; + } + + /* Do not allow empty requests */ + if (skb_queue_empty(&req->cmd_q)) + return -ENODATA; + + skb = skb_peek_tail(&req->cmd_q); + bt_cb(skb)->req.complete = complete; + + spin_lock_irqsave(&hdev->cmd_q.lock, flags); + skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q); + spin_unlock_irqrestore(&hdev->cmd_q.lock, flags); + + queue_work(hdev->workqueue, &hdev->cmd_work); + + return 0; +} + +static struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, + u32 plen, void *param) { int len = HCI_COMMAND_HDR_SIZE + plen; struct hci_command_hdr *hdr; struct sk_buff *skb; - BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen); - skb = bt_skb_alloc(len, GFP_ATOMIC); - if (!skb) { - BT_ERR("%s no memory for command", hdev->name); - return -ENOMEM; - } + if (!skb) + return NULL; hdr = (struct hci_command_hdr *) skb_put(skb, HCI_COMMAND_HDR_SIZE); hdr->opcode = cpu_to_le16(opcode); @@ -2187,8 +2499,26 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param) bt_cb(skb)->pkt_type = HCI_COMMAND_PKT; skb->dev = (void *) hdev; - if (test_bit(HCI_INIT, &hdev->flags)) - hdev->init_last_cmd = opcode; + return skb; +} + +/* Send HCI command */ +int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param) +{ + struct sk_buff *skb; + + BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen); + + skb = hci_prepare_cmd(hdev, opcode, plen, param); + if (!skb) { + BT_ERR("%s no memory for command", hdev->name); + return -ENOMEM; + } + + /* Stand-alone HCI commands must be flaged as + * single-command requests. + */ + bt_cb(skb)->req.start = true; skb_queue_tail(&hdev->cmd_q, skb); queue_work(hdev->workqueue, &hdev->cmd_work); @@ -2196,6 +2526,34 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param) return 0; } +/* Queue a command to an asynchronous HCI request */ +void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param) +{ + struct hci_dev *hdev = req->hdev; + struct sk_buff *skb; + + BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen); + + /* If an error occured during request building, there is no point in + * queueing the HCI command. We can simply return. + */ + if (req->err) + return; + + skb = hci_prepare_cmd(hdev, opcode, plen, param); + if (!skb) { + BT_ERR("%s no memory for command (opcode 0x%4.4x)", + hdev->name, opcode); + req->err = -ENOMEM; + return; + } + + if (skb_queue_empty(&req->cmd_q)) + bt_cb(skb)->req.start = true; + + skb_queue_tail(&req->cmd_q, skb); +} + /* Get data from the previously sent command */ void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode) { @@ -2398,7 +2756,7 @@ static void hci_link_tx_to(struct hci_dev *hdev, __u8 type) if (c->type == type && c->sent) { BT_ERR("%s killing stalled connection %pMR", hdev->name, &c->dst); - hci_acl_disconn(c, HCI_ERROR_REMOTE_USER_TERM); + hci_disconnect(c, HCI_ERROR_REMOTE_USER_TERM); } } @@ -2860,6 +3218,123 @@ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb) kfree_skb(skb); } +static bool hci_req_is_complete(struct hci_dev *hdev) +{ + struct sk_buff *skb; + + skb = skb_peek(&hdev->cmd_q); + if (!skb) + return true; + + return bt_cb(skb)->req.start; +} + +static void hci_resend_last(struct hci_dev *hdev) +{ + struct hci_command_hdr *sent; + struct sk_buff *skb; + u16 opcode; + + if (!hdev->sent_cmd) + return; + + sent = (void *) hdev->sent_cmd->data; + opcode = __le16_to_cpu(sent->opcode); + if (opcode == HCI_OP_RESET) + return; + + skb = skb_clone(hdev->sent_cmd, GFP_KERNEL); + if (!skb) + return; + + skb_queue_head(&hdev->cmd_q, skb); + queue_work(hdev->workqueue, &hdev->cmd_work); +} + +void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status) +{ + hci_req_complete_t req_complete = NULL; + struct sk_buff *skb; + unsigned long flags; + + BT_DBG("opcode 0x%04x status 0x%02x", opcode, status); + + /* If the completed command doesn't match the last one that was + * sent we need to do special handling of it. + */ + if (!hci_sent_cmd_data(hdev, opcode)) { + /* Some CSR based controllers generate a spontaneous + * reset complete event during init and any pending + * command will never be completed. In such a case we + * need to resend whatever was the last sent + * command. + */ + if (test_bit(HCI_INIT, &hdev->flags) && opcode == HCI_OP_RESET) + hci_resend_last(hdev); + + return; + } + + /* If the command succeeded and there's still more commands in + * this request the request is not yet complete. + */ + if (!status && !hci_req_is_complete(hdev)) + return; + + /* If this was the last command in a request the complete + * callback would be found in hdev->sent_cmd instead of the + * command queue (hdev->cmd_q). + */ + if (hdev->sent_cmd) { + req_complete = bt_cb(hdev->sent_cmd)->req.complete; + if (req_complete) + goto call_complete; + } + + /* Remove all pending commands belonging to this request */ + spin_lock_irqsave(&hdev->cmd_q.lock, flags); + while ((skb = __skb_dequeue(&hdev->cmd_q))) { + if (bt_cb(skb)->req.start) { + __skb_queue_head(&hdev->cmd_q, skb); + break; + } + + req_complete = bt_cb(skb)->req.complete; + kfree_skb(skb); + } + spin_unlock_irqrestore(&hdev->cmd_q.lock, flags); + +call_complete: + if (req_complete) + req_complete(hdev, status); +} + +void hci_req_cmd_status(struct hci_dev *hdev, u16 opcode, u8 status) +{ + hci_req_complete_t req_complete = NULL; + + BT_DBG("opcode 0x%04x status 0x%02x", opcode, status); + + if (status) { + hci_req_cmd_complete(hdev, opcode, status); + return; + } + + /* No need to handle success status if there are more commands */ + if (!hci_req_is_complete(hdev)) + return; + + if (hdev->sent_cmd) + req_complete = bt_cb(hdev->sent_cmd)->req.complete; + + /* If the request doesn't have a complete callback or there + * are other commands/requests in the hdev queue we consider + * this request as completed. + */ + if (!req_complete || !skb_queue_empty(&hdev->cmd_q)) + hci_req_cmd_complete(hdev, opcode, status); +} + static void hci_rx_work(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, rx_work); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 477726a63512..138580745c2c 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -53,7 +53,7 @@ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb) hci_discovery_set_state(hdev, DISCOVERY_STOPPED); hci_dev_unlock(hdev); - hci_req_complete(hdev, HCI_OP_INQUIRY_CANCEL, status); + hci_req_cmd_complete(hdev, HCI_OP_INQUIRY, status); hci_conn_check_pending(hdev); } @@ -183,8 +183,6 @@ static void hci_cc_write_def_link_policy(struct hci_dev *hdev, if (!status) hdev->link_policy = get_unaligned_le16(sent); - - hci_req_complete(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, status); } static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb) @@ -195,11 +193,8 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb) clear_bit(HCI_RESET, &hdev->flags); - hci_req_complete(hdev, HCI_OP_RESET, status); - /* Reset all non-persistent flags */ - hdev->dev_flags &= ~(BIT(HCI_LE_SCAN) | BIT(HCI_PENDING_CLASS) | - BIT(HCI_PERIODIC_INQ)); + hdev->dev_flags &= ~HCI_PERSISTENT_MASK; hdev->discovery.state = DISCOVERY_STOPPED; hdev->inq_tx_power = HCI_TX_POWER_INVALID; @@ -228,11 +223,6 @@ static void hci_cc_write_local_name(struct hci_dev *hdev, struct sk_buff *skb) memcpy(hdev->dev_name, sent, HCI_MAX_NAME_LENGTH); hci_dev_unlock(hdev); - - if (!status && !test_bit(HCI_INIT, &hdev->flags)) - hci_update_ad(hdev); - - hci_req_complete(hdev, HCI_OP_WRITE_LOCAL_NAME, status); } static void hci_cc_read_local_name(struct hci_dev *hdev, struct sk_buff *skb) @@ -270,8 +260,6 @@ static void hci_cc_write_auth_enable(struct hci_dev *hdev, struct sk_buff *skb) if (test_bit(HCI_MGMT, &hdev->dev_flags)) mgmt_auth_enable_complete(hdev, status); - - hci_req_complete(hdev, HCI_OP_WRITE_AUTH_ENABLE, status); } static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb) @@ -293,8 +281,6 @@ static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb) else clear_bit(HCI_ENCRYPT, &hdev->flags); } - - hci_req_complete(hdev, HCI_OP_WRITE_ENCRYPT_MODE, status); } static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb) @@ -343,7 +329,6 @@ static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb) done: hci_dev_unlock(hdev); - hci_req_complete(hdev, HCI_OP_WRITE_SCAN_ENABLE, status); } static void hci_cc_read_class_of_dev(struct hci_dev *hdev, struct sk_buff *skb) @@ -435,15 +420,6 @@ static void hci_cc_write_voice_setting(struct hci_dev *hdev, hdev->notify(hdev, HCI_NOTIFY_VOICE_SETTING); } -static void hci_cc_host_buffer_size(struct hci_dev *hdev, struct sk_buff *skb) -{ - __u8 status = *((__u8 *) skb->data); - - BT_DBG("%s status 0x%2.2x", hdev->name, status); - - hci_req_complete(hdev, HCI_OP_HOST_BUFFER_SIZE, status); -} - static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb) { __u8 status = *((__u8 *) skb->data); @@ -472,211 +448,6 @@ static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb) } } -static u8 hci_get_inquiry_mode(struct hci_dev *hdev) -{ - if (lmp_ext_inq_capable(hdev)) - return 2; - - if (lmp_inq_rssi_capable(hdev)) - return 1; - - if (hdev->manufacturer == 11 && hdev->hci_rev == 0x00 && - hdev->lmp_subver == 0x0757) - return 1; - - if (hdev->manufacturer == 15) { - if (hdev->hci_rev == 0x03 && hdev->lmp_subver == 0x6963) - return 1; - if (hdev->hci_rev == 0x09 && hdev->lmp_subver == 0x6963) - return 1; - if (hdev->hci_rev == 0x00 && hdev->lmp_subver == 0x6965) - return 1; - } - - if (hdev->manufacturer == 31 && hdev->hci_rev == 0x2005 && - hdev->lmp_subver == 0x1805) - return 1; - - return 0; -} - -static void hci_setup_inquiry_mode(struct hci_dev *hdev) -{ - u8 mode; - - mode = hci_get_inquiry_mode(hdev); - - hci_send_cmd(hdev, HCI_OP_WRITE_INQUIRY_MODE, 1, &mode); -} - -static void hci_setup_event_mask(struct hci_dev *hdev) -{ - /* The second byte is 0xff instead of 0x9f (two reserved bits - * disabled) since a Broadcom 1.2 dongle doesn't respond to the - * command otherwise */ - u8 events[8] = { 0xff, 0xff, 0xfb, 0xff, 0x00, 0x00, 0x00, 0x00 }; - - /* CSR 1.1 dongles does not accept any bitfield so don't try to set - * any event mask for pre 1.2 devices */ - if (hdev->hci_ver < BLUETOOTH_VER_1_2) - return; - - if (lmp_bredr_capable(hdev)) { - events[4] |= 0x01; /* Flow Specification Complete */ - events[4] |= 0x02; /* Inquiry Result with RSSI */ - events[4] |= 0x04; /* Read Remote Extended Features Complete */ - events[5] |= 0x08; /* Synchronous Connection Complete */ - events[5] |= 0x10; /* Synchronous Connection Changed */ - } - - if (lmp_inq_rssi_capable(hdev)) - events[4] |= 0x02; /* Inquiry Result with RSSI */ - - if (lmp_sniffsubr_capable(hdev)) - events[5] |= 0x20; /* Sniff Subrating */ - - if (lmp_pause_enc_capable(hdev)) - events[5] |= 0x80; /* Encryption Key Refresh Complete */ - - if (lmp_ext_inq_capable(hdev)) - events[5] |= 0x40; /* Extended Inquiry Result */ - - if (lmp_no_flush_capable(hdev)) - events[7] |= 0x01; /* Enhanced Flush Complete */ - - if (lmp_lsto_capable(hdev)) - events[6] |= 0x80; /* Link Supervision Timeout Changed */ - - if (lmp_ssp_capable(hdev)) { - events[6] |= 0x01; /* IO Capability Request */ - events[6] |= 0x02; /* IO Capability Response */ - events[6] |= 0x04; /* User Confirmation Request */ - events[6] |= 0x08; /* User Passkey Request */ - events[6] |= 0x10; /* Remote OOB Data Request */ - events[6] |= 0x20; /* Simple Pairing Complete */ - events[7] |= 0x04; /* User Passkey Notification */ - events[7] |= 0x08; /* Keypress Notification */ - events[7] |= 0x10; /* Remote Host Supported - * Features Notification */ - } - - if (lmp_le_capable(hdev)) - events[7] |= 0x20; /* LE Meta-Event */ - - hci_send_cmd(hdev, HCI_OP_SET_EVENT_MASK, sizeof(events), events); - - if (lmp_le_capable(hdev)) { - memset(events, 0, sizeof(events)); - events[0] = 0x1f; - hci_send_cmd(hdev, HCI_OP_LE_SET_EVENT_MASK, - sizeof(events), events); - } -} - -static void bredr_setup(struct hci_dev *hdev) -{ - struct hci_cp_delete_stored_link_key cp; - __le16 param; - __u8 flt_type; - - /* Read Buffer Size (ACL mtu, max pkt, etc.) */ - hci_send_cmd(hdev, HCI_OP_READ_BUFFER_SIZE, 0, NULL); - - /* Read Class of Device */ - hci_send_cmd(hdev, HCI_OP_READ_CLASS_OF_DEV, 0, NULL); - - /* Read Local Name */ - hci_send_cmd(hdev, HCI_OP_READ_LOCAL_NAME, 0, NULL); - - /* Read Voice Setting */ - hci_send_cmd(hdev, HCI_OP_READ_VOICE_SETTING, 0, NULL); - - /* Clear Event Filters */ - flt_type = HCI_FLT_CLEAR_ALL; - hci_send_cmd(hdev, HCI_OP_SET_EVENT_FLT, 1, &flt_type); - - /* Connection accept timeout ~20 secs */ - param = __constant_cpu_to_le16(0x7d00); - hci_send_cmd(hdev, HCI_OP_WRITE_CA_TIMEOUT, 2, ¶m); - - bacpy(&cp.bdaddr, BDADDR_ANY); - cp.delete_all = 1; - hci_send_cmd(hdev, HCI_OP_DELETE_STORED_LINK_KEY, sizeof(cp), &cp); -} - -static void le_setup(struct hci_dev *hdev) -{ - /* Read LE Buffer Size */ - hci_send_cmd(hdev, HCI_OP_LE_READ_BUFFER_SIZE, 0, NULL); - - /* Read LE Local Supported Features */ - hci_send_cmd(hdev, HCI_OP_LE_READ_LOCAL_FEATURES, 0, NULL); - - /* Read LE Advertising Channel TX Power */ - hci_send_cmd(hdev, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL); - - /* Read LE White List Size */ - hci_send_cmd(hdev, HCI_OP_LE_READ_WHITE_LIST_SIZE, 0, NULL); - - /* Read LE Supported States */ - hci_send_cmd(hdev, HCI_OP_LE_READ_SUPPORTED_STATES, 0, NULL); -} - -static void hci_setup(struct hci_dev *hdev) -{ - if (hdev->dev_type != HCI_BREDR) - return; - - /* Read BD Address */ - hci_send_cmd(hdev, HCI_OP_READ_BD_ADDR, 0, NULL); - - if (lmp_bredr_capable(hdev)) - bredr_setup(hdev); - - if (lmp_le_capable(hdev)) - le_setup(hdev); - - hci_setup_event_mask(hdev); - - if (hdev->hci_ver > BLUETOOTH_VER_1_1) - hci_send_cmd(hdev, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL); - - if (lmp_ssp_capable(hdev)) { - if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) { - u8 mode = 0x01; - hci_send_cmd(hdev, HCI_OP_WRITE_SSP_MODE, - sizeof(mode), &mode); - } else { - struct hci_cp_write_eir cp; - - memset(hdev->eir, 0, sizeof(hdev->eir)); - memset(&cp, 0, sizeof(cp)); - - hci_send_cmd(hdev, HCI_OP_WRITE_EIR, sizeof(cp), &cp); - } - } - - if (lmp_inq_rssi_capable(hdev)) - hci_setup_inquiry_mode(hdev); - - if (lmp_inq_tx_pwr_capable(hdev)) - hci_send_cmd(hdev, HCI_OP_READ_INQ_RSP_TX_POWER, 0, NULL); - - if (lmp_ext_feat_capable(hdev)) { - struct hci_cp_read_local_ext_features cp; - - cp.page = 0x01; - hci_send_cmd(hdev, HCI_OP_READ_LOCAL_EXT_FEATURES, sizeof(cp), - &cp); - } - - if (test_bit(HCI_LINK_SECURITY, &hdev->dev_flags)) { - u8 enable = 1; - hci_send_cmd(hdev, HCI_OP_WRITE_AUTH_ENABLE, sizeof(enable), - &enable); - } -} - static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_rp_read_local_version *rp = (void *) skb->data; @@ -684,7 +455,7 @@ static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - goto done; + return; hdev->hci_ver = rp->hci_ver; hdev->hci_rev = __le16_to_cpu(rp->hci_rev); @@ -694,30 +465,6 @@ static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s manufacturer 0x%4.4x hci ver %d:%d", hdev->name, hdev->manufacturer, hdev->hci_ver, hdev->hci_rev); - - if (test_bit(HCI_INIT, &hdev->flags)) - hci_setup(hdev); - -done: - hci_req_complete(hdev, HCI_OP_READ_LOCAL_VERSION, rp->status); -} - -static void hci_setup_link_policy(struct hci_dev *hdev) -{ - struct hci_cp_write_def_link_policy cp; - u16 link_policy = 0; - - if (lmp_rswitch_capable(hdev)) - link_policy |= HCI_LP_RSWITCH; - if (lmp_hold_capable(hdev)) - link_policy |= HCI_LP_HOLD; - if (lmp_sniff_capable(hdev)) - link_policy |= HCI_LP_SNIFF; - if (lmp_park_capable(hdev)) - link_policy |= HCI_LP_PARK; - - cp.policy = cpu_to_le16(link_policy); - hci_send_cmd(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, sizeof(cp), &cp); } static void hci_cc_read_local_commands(struct hci_dev *hdev, @@ -727,16 +474,8 @@ static void hci_cc_read_local_commands(struct hci_dev *hdev, BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); - if (rp->status) - goto done; - - memcpy(hdev->commands, rp->commands, sizeof(hdev->commands)); - - if (test_bit(HCI_INIT, &hdev->flags) && (hdev->commands[5] & 0x10)) - hci_setup_link_policy(hdev); - -done: - hci_req_complete(hdev, HCI_OP_READ_LOCAL_COMMANDS, rp->status); + if (!rp->status) + memcpy(hdev->commands, rp->commands, sizeof(hdev->commands)); } static void hci_cc_read_local_features(struct hci_dev *hdev, @@ -795,22 +534,6 @@ static void hci_cc_read_local_features(struct hci_dev *hdev, hdev->features[6], hdev->features[7]); } -static void hci_set_le_support(struct hci_dev *hdev) -{ - struct hci_cp_write_le_host_supported cp; - - memset(&cp, 0, sizeof(cp)); - - if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) { - cp.le = 1; - cp.simul = lmp_le_br_capable(hdev); - } - - if (cp.le != lmp_host_le_capable(hdev)) - hci_send_cmd(hdev, HCI_OP_WRITE_LE_HOST_SUPPORTED, sizeof(cp), - &cp); -} - static void hci_cc_read_local_ext_features(struct hci_dev *hdev, struct sk_buff *skb) { @@ -819,7 +542,7 @@ static void hci_cc_read_local_ext_features(struct hci_dev *hdev, BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - goto done; + return; switch (rp->page) { case 0: @@ -829,12 +552,6 @@ static void hci_cc_read_local_ext_features(struct hci_dev *hdev, memcpy(hdev->host_features, rp->features, 8); break; } - - if (test_bit(HCI_INIT, &hdev->flags) && lmp_le_capable(hdev)) - hci_set_le_support(hdev); - -done: - hci_req_complete(hdev, HCI_OP_READ_LOCAL_EXT_FEATURES, rp->status); } static void hci_cc_read_flow_control_mode(struct hci_dev *hdev, @@ -844,12 +561,8 @@ static void hci_cc_read_flow_control_mode(struct hci_dev *hdev, BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); - if (rp->status) - return; - - hdev->flow_ctl_mode = rp->mode; - - hci_req_complete(hdev, HCI_OP_READ_FLOW_CONTROL_MODE, rp->status); + if (!rp->status) + hdev->flow_ctl_mode = rp->mode; } static void hci_cc_read_buffer_size(struct hci_dev *hdev, struct sk_buff *skb) @@ -886,8 +599,65 @@ static void hci_cc_read_bd_addr(struct hci_dev *hdev, struct sk_buff *skb) if (!rp->status) bacpy(&hdev->bdaddr, &rp->bdaddr); +} + +static void hci_cc_read_page_scan_activity(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_read_page_scan_activity *rp = (void *) skb->data; + + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + + if (test_bit(HCI_INIT, &hdev->flags) && !rp->status) { + hdev->page_scan_interval = __le16_to_cpu(rp->interval); + hdev->page_scan_window = __le16_to_cpu(rp->window); + } +} + +static void hci_cc_write_page_scan_activity(struct hci_dev *hdev, + struct sk_buff *skb) +{ + u8 status = *((u8 *) skb->data); + struct hci_cp_write_page_scan_activity *sent; + + BT_DBG("%s status 0x%2.2x", hdev->name, status); + + if (status) + return; + + sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_PAGE_SCAN_ACTIVITY); + if (!sent) + return; + + hdev->page_scan_interval = __le16_to_cpu(sent->interval); + hdev->page_scan_window = __le16_to_cpu(sent->window); +} + +static void hci_cc_read_page_scan_type(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_read_page_scan_type *rp = (void *) skb->data; + + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + + if (test_bit(HCI_INIT, &hdev->flags) && !rp->status) + hdev->page_scan_type = rp->type; +} + +static void hci_cc_write_page_scan_type(struct hci_dev *hdev, + struct sk_buff *skb) +{ + u8 status = *((u8 *) skb->data); + u8 *type; - hci_req_complete(hdev, HCI_OP_READ_BD_ADDR, rp->status); + BT_DBG("%s status 0x%2.2x", hdev->name, status); + + if (status) + return; + + type = hci_sent_cmd_data(hdev, HCI_OP_WRITE_PAGE_SCAN_TYPE); + if (type) + hdev->page_scan_type = *type; } static void hci_cc_read_data_block_size(struct hci_dev *hdev, @@ -908,17 +678,6 @@ static void hci_cc_read_data_block_size(struct hci_dev *hdev, BT_DBG("%s blk mtu %d cnt %d len %d", hdev->name, hdev->block_mtu, hdev->block_cnt, hdev->block_len); - - hci_req_complete(hdev, HCI_OP_READ_DATA_BLOCK_SIZE, rp->status); -} - -static void hci_cc_write_ca_timeout(struct hci_dev *hdev, struct sk_buff *skb) -{ - __u8 status = *((__u8 *) skb->data); - - BT_DBG("%s status 0x%2.2x", hdev->name, status); - - hci_req_complete(hdev, HCI_OP_WRITE_CA_TIMEOUT, status); } static void hci_cc_read_local_amp_info(struct hci_dev *hdev, @@ -942,8 +701,6 @@ static void hci_cc_read_local_amp_info(struct hci_dev *hdev, hdev->amp_be_flush_to = __le32_to_cpu(rp->be_flush_to); hdev->amp_max_flush_to = __le32_to_cpu(rp->max_flush_to); - hci_req_complete(hdev, HCI_OP_READ_LOCAL_AMP_INFO, rp->status); - a2mp_rsp: a2mp_send_getinfo_rsp(hdev); } @@ -985,35 +742,6 @@ a2mp_rsp: a2mp_send_create_phy_link_req(hdev, rp->status); } -static void hci_cc_delete_stored_link_key(struct hci_dev *hdev, - struct sk_buff *skb) -{ - __u8 status = *((__u8 *) skb->data); - - BT_DBG("%s status 0x%2.2x", hdev->name, status); - - hci_req_complete(hdev, HCI_OP_DELETE_STORED_LINK_KEY, status); -} - -static void hci_cc_set_event_mask(struct hci_dev *hdev, struct sk_buff *skb) -{ - __u8 status = *((__u8 *) skb->data); - - BT_DBG("%s status 0x%2.2x", hdev->name, status); - - hci_req_complete(hdev, HCI_OP_SET_EVENT_MASK, status); -} - -static void hci_cc_write_inquiry_mode(struct hci_dev *hdev, - struct sk_buff *skb) -{ - __u8 status = *((__u8 *) skb->data); - - BT_DBG("%s status 0x%2.2x", hdev->name, status); - - hci_req_complete(hdev, HCI_OP_WRITE_INQUIRY_MODE, status); -} - static void hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev, struct sk_buff *skb) { @@ -1023,17 +751,6 @@ static void hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev, if (!rp->status) hdev->inq_tx_power = rp->tx_power; - - hci_req_complete(hdev, HCI_OP_READ_INQ_RSP_TX_POWER, rp->status); -} - -static void hci_cc_set_event_flt(struct hci_dev *hdev, struct sk_buff *skb) -{ - __u8 status = *((__u8 *) skb->data); - - BT_DBG("%s status 0x%2.2x", hdev->name, status); - - hci_req_complete(hdev, HCI_OP_SET_EVENT_FLT, status); } static void hci_cc_pin_code_reply(struct hci_dev *hdev, struct sk_buff *skb) @@ -1095,8 +812,6 @@ static void hci_cc_le_read_buffer_size(struct hci_dev *hdev, hdev->le_cnt = hdev->le_pkts; BT_DBG("%s le mtu %d:%d", hdev->name, hdev->le_mtu, hdev->le_pkts); - - hci_req_complete(hdev, HCI_OP_LE_READ_BUFFER_SIZE, rp->status); } static void hci_cc_le_read_local_features(struct hci_dev *hdev, @@ -1108,8 +823,6 @@ static void hci_cc_le_read_local_features(struct hci_dev *hdev, if (!rp->status) memcpy(hdev->le_features, rp->features, 8); - - hci_req_complete(hdev, HCI_OP_LE_READ_LOCAL_FEATURES, rp->status); } static void hci_cc_le_read_adv_tx_power(struct hci_dev *hdev, @@ -1119,22 +832,8 @@ static void hci_cc_le_read_adv_tx_power(struct hci_dev *hdev, BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); - if (!rp->status) { + if (!rp->status) hdev->adv_tx_power = rp->tx_power; - if (!test_bit(HCI_INIT, &hdev->flags)) - hci_update_ad(hdev); - } - - hci_req_complete(hdev, HCI_OP_LE_READ_ADV_TX_POWER, rp->status); -} - -static void hci_cc_le_set_event_mask(struct hci_dev *hdev, struct sk_buff *skb) -{ - __u8 status = *((__u8 *) skb->data); - - BT_DBG("%s status 0x%2.2x", hdev->name, status); - - hci_req_complete(hdev, HCI_OP_LE_SET_EVENT_MASK, status); } static void hci_cc_user_confirm_reply(struct hci_dev *hdev, struct sk_buff *skb) @@ -1231,12 +930,15 @@ static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb) clear_bit(HCI_LE_PERIPHERAL, &hdev->dev_flags); } - hci_dev_unlock(hdev); + if (!test_bit(HCI_INIT, &hdev->flags)) { + struct hci_request req; - if (!test_bit(HCI_INIT, &hdev->flags)) - hci_update_ad(hdev); + hci_req_init(&req, hdev); + hci_update_ad(&req); + hci_req_run(&req, NULL); + } - hci_req_complete(hdev, HCI_OP_LE_SET_ADV_ENABLE, status); + hci_dev_unlock(hdev); } static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb) @@ -1245,8 +947,6 @@ static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%2.2x", hdev->name, status); - hci_req_complete(hdev, HCI_OP_LE_SET_SCAN_PARAM, status); - if (status) { hci_dev_lock(hdev); mgmt_start_discovery_failed(hdev, status); @@ -1269,8 +969,6 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, switch (cp->enable) { case LE_SCANNING_ENABLED: - hci_req_complete(hdev, HCI_OP_LE_SET_SCAN_ENABLE, status); - if (status) { hci_dev_lock(hdev); mgmt_start_discovery_failed(hdev, status); @@ -1321,32 +1019,6 @@ static void hci_cc_le_read_white_list_size(struct hci_dev *hdev, if (!rp->status) hdev->le_white_list_size = rp->size; - - hci_req_complete(hdev, HCI_OP_LE_READ_WHITE_LIST_SIZE, rp->status); -} - -static void hci_cc_le_ltk_reply(struct hci_dev *hdev, struct sk_buff *skb) -{ - struct hci_rp_le_ltk_reply *rp = (void *) skb->data; - - BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); - - if (rp->status) - return; - - hci_req_complete(hdev, HCI_OP_LE_LTK_REPLY, rp->status); -} - -static void hci_cc_le_ltk_neg_reply(struct hci_dev *hdev, struct sk_buff *skb) -{ - struct hci_rp_le_ltk_neg_reply *rp = (void *) skb->data; - - BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); - - if (rp->status) - return; - - hci_req_complete(hdev, HCI_OP_LE_LTK_NEG_REPLY, rp->status); } static void hci_cc_le_read_supported_states(struct hci_dev *hdev, @@ -1358,8 +1030,6 @@ static void hci_cc_le_read_supported_states(struct hci_dev *hdev, if (!rp->status) memcpy(hdev->le_states, rp->le_states, 8); - - hci_req_complete(hdev, HCI_OP_LE_READ_SUPPORTED_STATES, rp->status); } static void hci_cc_write_le_host_supported(struct hci_dev *hdev, @@ -1389,8 +1059,6 @@ static void hci_cc_write_le_host_supported(struct hci_dev *hdev, if (test_bit(HCI_MGMT, &hdev->dev_flags) && !test_bit(HCI_INIT, &hdev->flags)) mgmt_le_enable_complete(hdev, sent->le, status); - - hci_req_complete(hdev, HCI_OP_WRITE_LE_HOST_SUPPORTED, status); } static void hci_cc_write_remote_amp_assoc(struct hci_dev *hdev, @@ -1412,7 +1080,6 @@ static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) BT_DBG("%s status 0x%2.2x", hdev->name, status); if (status) { - hci_req_complete(hdev, HCI_OP_INQUIRY, status); hci_conn_check_pending(hdev); hci_dev_lock(hdev); if (test_bit(HCI_MGMT, &hdev->dev_flags)) @@ -1884,11 +1551,6 @@ static void hci_cs_le_create_conn(struct hci_dev *hdev, __u8 status) } } -static void hci_cs_le_start_enc(struct hci_dev *hdev, u8 status) -{ - BT_DBG("%s status 0x%2.2x", hdev->name, status); -} - static void hci_cs_create_phylink(struct hci_dev *hdev, u8 status) { struct hci_cp_create_phy_link *cp; @@ -1930,11 +1592,6 @@ static void hci_cs_accept_phylink(struct hci_dev *hdev, u8 status) amp_write_remote_assoc(hdev, cp->phy_handle); } -static void hci_cs_create_logical_link(struct hci_dev *hdev, u8 status) -{ - BT_DBG("%s status 0x%2.2x", hdev->name, status); -} - static void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { __u8 status = *((__u8 *) skb->data); @@ -1943,7 +1600,7 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%2.2x", hdev->name, status); - hci_req_complete(hdev, HCI_OP_INQUIRY, status); + hci_req_cmd_complete(hdev, HCI_OP_INQUIRY, status); hci_conn_check_pending(hdev); @@ -2399,7 +2056,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb) clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags); if (ev->status && conn->state == BT_CONNECTED) { - hci_acl_disconn(conn, HCI_ERROR_AUTH_FAILURE); + hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE); hci_conn_put(conn); goto unlock; } @@ -2491,20 +2148,10 @@ unlock: hci_dev_unlock(hdev); } -static void hci_remote_version_evt(struct hci_dev *hdev, struct sk_buff *skb) -{ - BT_DBG("%s", hdev->name); -} - -static void hci_qos_setup_complete_evt(struct hci_dev *hdev, - struct sk_buff *skb) -{ - BT_DBG("%s", hdev->name); -} - static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_cmd_complete *ev = (void *) skb->data; + u8 status = skb->data[sizeof(*ev)]; __u16 opcode; skb_pull(skb, sizeof(*ev)); @@ -2588,10 +2235,6 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cc_write_voice_setting(hdev, skb); break; - case HCI_OP_HOST_BUFFER_SIZE: - hci_cc_host_buffer_size(hdev, skb); - break; - case HCI_OP_WRITE_SSP_MODE: hci_cc_write_ssp_mode(hdev, skb); break; @@ -2620,46 +2263,42 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cc_read_bd_addr(hdev, skb); break; - case HCI_OP_READ_DATA_BLOCK_SIZE: - hci_cc_read_data_block_size(hdev, skb); + case HCI_OP_READ_PAGE_SCAN_ACTIVITY: + hci_cc_read_page_scan_activity(hdev, skb); break; - case HCI_OP_WRITE_CA_TIMEOUT: - hci_cc_write_ca_timeout(hdev, skb); + case HCI_OP_WRITE_PAGE_SCAN_ACTIVITY: + hci_cc_write_page_scan_activity(hdev, skb); break; - case HCI_OP_READ_FLOW_CONTROL_MODE: - hci_cc_read_flow_control_mode(hdev, skb); + case HCI_OP_READ_PAGE_SCAN_TYPE: + hci_cc_read_page_scan_type(hdev, skb); break; - case HCI_OP_READ_LOCAL_AMP_INFO: - hci_cc_read_local_amp_info(hdev, skb); + case HCI_OP_WRITE_PAGE_SCAN_TYPE: + hci_cc_write_page_scan_type(hdev, skb); break; - case HCI_OP_READ_LOCAL_AMP_ASSOC: - hci_cc_read_local_amp_assoc(hdev, skb); + case HCI_OP_READ_DATA_BLOCK_SIZE: + hci_cc_read_data_block_size(hdev, skb); break; - case HCI_OP_DELETE_STORED_LINK_KEY: - hci_cc_delete_stored_link_key(hdev, skb); + case HCI_OP_READ_FLOW_CONTROL_MODE: + hci_cc_read_flow_control_mode(hdev, skb); break; - case HCI_OP_SET_EVENT_MASK: - hci_cc_set_event_mask(hdev, skb); + case HCI_OP_READ_LOCAL_AMP_INFO: + hci_cc_read_local_amp_info(hdev, skb); break; - case HCI_OP_WRITE_INQUIRY_MODE: - hci_cc_write_inquiry_mode(hdev, skb); + case HCI_OP_READ_LOCAL_AMP_ASSOC: + hci_cc_read_local_amp_assoc(hdev, skb); break; case HCI_OP_READ_INQ_RSP_TX_POWER: hci_cc_read_inq_rsp_tx_power(hdev, skb); break; - case HCI_OP_SET_EVENT_FLT: - hci_cc_set_event_flt(hdev, skb); - break; - case HCI_OP_PIN_CODE_REPLY: hci_cc_pin_code_reply(hdev, skb); break; @@ -2684,10 +2323,6 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cc_le_read_adv_tx_power(hdev, skb); break; - case HCI_OP_LE_SET_EVENT_MASK: - hci_cc_le_set_event_mask(hdev, skb); - break; - case HCI_OP_USER_CONFIRM_REPLY: hci_cc_user_confirm_reply(hdev, skb); break; @@ -2720,14 +2355,6 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cc_le_read_white_list_size(hdev, skb); break; - case HCI_OP_LE_LTK_REPLY: - hci_cc_le_ltk_reply(hdev, skb); - break; - - case HCI_OP_LE_LTK_NEG_REPLY: - hci_cc_le_ltk_neg_reply(hdev, skb); - break; - case HCI_OP_LE_READ_SUPPORTED_STATES: hci_cc_le_read_supported_states(hdev, skb); break; @@ -2745,9 +2372,11 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) break; } - if (ev->opcode != HCI_OP_NOP) + if (opcode != HCI_OP_NOP) del_timer(&hdev->cmd_timer); + hci_req_cmd_complete(hdev, opcode, status); + if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) { atomic_set(&hdev->cmd_cnt, 1); if (!skb_queue_empty(&hdev->cmd_q)) @@ -2817,10 +2446,6 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cs_le_create_conn(hdev, ev->status); break; - case HCI_OP_LE_START_ENC: - hci_cs_le_start_enc(hdev, ev->status); - break; - case HCI_OP_CREATE_PHY_LINK: hci_cs_create_phylink(hdev, ev->status); break; @@ -2829,18 +2454,16 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cs_accept_phylink(hdev, ev->status); break; - case HCI_OP_CREATE_LOGICAL_LINK: - hci_cs_create_logical_link(hdev, ev->status); - break; - default: BT_DBG("%s opcode 0x%4.4x", hdev->name, opcode); break; } - if (ev->opcode != HCI_OP_NOP) + if (opcode != HCI_OP_NOP) del_timer(&hdev->cmd_timer); + hci_req_cmd_status(hdev, opcode, ev->status); + if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) { atomic_set(&hdev->cmd_cnt, 1); if (!skb_queue_empty(&hdev->cmd_q)) @@ -3391,18 +3014,6 @@ unlock: hci_dev_unlock(hdev); } -static void hci_sync_conn_changed_evt(struct hci_dev *hdev, struct sk_buff *skb) -{ - BT_DBG("%s", hdev->name); -} - -static void hci_sniff_subrate_evt(struct hci_dev *hdev, struct sk_buff *skb) -{ - struct hci_ev_sniff_subrate *ev = (void *) skb->data; - - BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); -} - static void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb) { @@ -3472,7 +3083,7 @@ static void hci_key_refresh_complete_evt(struct hci_dev *hdev, clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags); if (ev->status && conn->state == BT_CONNECTED) { - hci_acl_disconn(conn, HCI_ERROR_AUTH_FAILURE); + hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE); hci_conn_put(conn); goto unlock; } @@ -4130,14 +3741,6 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_remote_features_evt(hdev, skb); break; - case HCI_EV_REMOTE_VERSION: - hci_remote_version_evt(hdev, skb); - break; - - case HCI_EV_QOS_SETUP_COMPLETE: - hci_qos_setup_complete_evt(hdev, skb); - break; - case HCI_EV_CMD_COMPLETE: hci_cmd_complete_evt(hdev, skb); break; @@ -4194,14 +3797,6 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_sync_conn_complete_evt(hdev, skb); break; - case HCI_EV_SYNC_CONN_CHANGED: - hci_sync_conn_changed_evt(hdev, skb); - break; - - case HCI_EV_SNIFF_SUBRATE: - hci_sniff_subrate_evt(hdev, skb); - break; - case HCI_EV_EXTENDED_INQUIRY_RESULT: hci_extended_inquiry_result_evt(hdev, skb); break; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 6a93614f2c49..aa4354fca77c 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -854,6 +854,11 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, skb_queue_tail(&hdev->raw_q, skb); queue_work(hdev->workqueue, &hdev->tx_work); } else { + /* Stand-alone HCI commands must be flaged as + * single-command requests. + */ + bt_cb(skb)->req.start = true; + skb_queue_tail(&hdev->cmd_q, skb); queue_work(hdev->workqueue, &hdev->cmd_work); } @@ -1121,8 +1126,6 @@ error: void hci_sock_cleanup(void) { bt_procfs_cleanup(&init_net, "hci"); - if (bt_sock_unregister(BTPROTO_HCI) < 0) - BT_ERR("HCI socket unregistration failed"); - + bt_sock_unregister(BTPROTO_HCI); proto_unregister(&hci_sk_proto); } diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 23b4e242a31a..ff38561385de 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -590,10 +590,8 @@ int __init bt_sysfs_init(void) bt_debugfs = debugfs_create_dir("bluetooth", NULL); bt_class = class_create(THIS_MODULE, "bluetooth"); - if (IS_ERR(bt_class)) - return PTR_ERR(bt_class); - return 0; + return PTR_RET(bt_class); } void bt_sysfs_cleanup(void) diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index a7352ff3fd1e..2342327f3335 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -311,6 +311,9 @@ static int hidp_get_raw_report(struct hid_device *hid, int numbered_reports = hid->report_enum[report_type].numbered; int ret; + if (atomic_read(&session->terminate)) + return -EIO; + switch (report_type) { case HID_FEATURE_REPORT: report_type = HIDP_TRANS_GET_REPORT | HIDP_DATA_RTYPE_FEATURE; @@ -722,6 +725,7 @@ static int hidp_session(void *arg) set_current_state(TASK_INTERRUPTIBLE); } set_current_state(TASK_RUNNING); + atomic_inc(&session->terminate); remove_wait_queue(sk_sleep(intr_sk), &intr_wait); remove_wait_queue(sk_sleep(ctrl_sk), &ctrl_wait); diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index 82a829d90b0f..5d0f1ca0a314 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c @@ -304,8 +304,6 @@ error: void __exit hidp_cleanup_sockets(void) { bt_procfs_cleanup(&init_net, "hidp"); - if (bt_sock_unregister(BTPROTO_HIDP) < 0) - BT_ERR("Can't unregister HIDP socket"); - + bt_sock_unregister(BTPROTO_HIDP); proto_unregister(&hidp_proto); } diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 1bcfb8422fdc..7f9704993b74 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1312,8 +1312,6 @@ error: void l2cap_cleanup_sockets(void) { bt_procfs_cleanup(&init_net, "l2cap"); - if (bt_sock_unregister(BTPROTO_L2CAP) < 0) - BT_ERR("L2CAP socket unregistration failed"); - + bt_sock_unregister(BTPROTO_L2CAP); proto_unregister(&l2cap_proto); } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 39395c7144aa..03e7e732215f 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -384,7 +384,8 @@ static u32 get_supported_settings(struct hci_dev *hdev) if (lmp_bredr_capable(hdev)) { settings |= MGMT_SETTING_CONNECTABLE; - settings |= MGMT_SETTING_FAST_CONNECTABLE; + if (hdev->hci_ver >= BLUETOOTH_VER_1_2) + settings |= MGMT_SETTING_FAST_CONNECTABLE; settings |= MGMT_SETTING_DISCOVERABLE; settings |= MGMT_SETTING_BREDR; settings |= MGMT_SETTING_LINK_SECURITY; @@ -409,6 +410,9 @@ static u32 get_current_settings(struct hci_dev *hdev) if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags)) settings |= MGMT_SETTING_CONNECTABLE; + if (test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags)) + settings |= MGMT_SETTING_FAST_CONNECTABLE; + if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags)) settings |= MGMT_SETTING_DISCOVERABLE; @@ -591,32 +595,33 @@ static void create_eir(struct hci_dev *hdev, u8 *data) ptr = create_uuid128_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data)); } -static int update_eir(struct hci_dev *hdev) +static void update_eir(struct hci_request *req) { + struct hci_dev *hdev = req->hdev; struct hci_cp_write_eir cp; if (!hdev_is_powered(hdev)) - return 0; + return; if (!lmp_ext_inq_capable(hdev)) - return 0; + return; if (!test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) - return 0; + return; if (test_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) - return 0; + return; memset(&cp, 0, sizeof(cp)); create_eir(hdev, cp.data); if (memcmp(cp.data, hdev->eir, sizeof(cp.data)) == 0) - return 0; + return; memcpy(hdev->eir, cp.data, sizeof(cp.data)); - return hci_send_cmd(hdev, HCI_OP_WRITE_EIR, sizeof(cp), &cp); + hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp); } static u8 get_service_classes(struct hci_dev *hdev) @@ -630,47 +635,48 @@ static u8 get_service_classes(struct hci_dev *hdev) return val; } -static int update_class(struct hci_dev *hdev) +static void update_class(struct hci_request *req) { + struct hci_dev *hdev = req->hdev; u8 cod[3]; - int err; BT_DBG("%s", hdev->name); if (!hdev_is_powered(hdev)) - return 0; + return; if (test_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) - return 0; + return; cod[0] = hdev->minor_class; cod[1] = hdev->major_class; cod[2] = get_service_classes(hdev); if (memcmp(cod, hdev->dev_class, 3) == 0) - return 0; - - err = hci_send_cmd(hdev, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod); - if (err == 0) - set_bit(HCI_PENDING_CLASS, &hdev->dev_flags); + return; - return err; + hci_req_add(req, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod); } static void service_cache_off(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, service_cache.work); + struct hci_request req; if (!test_and_clear_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) return; + hci_req_init(&req, hdev); + hci_dev_lock(hdev); - update_eir(hdev); - update_class(hdev); + update_eir(&req); + update_class(&req); hci_dev_unlock(hdev); + + hci_req_run(&req, NULL); } static void mgmt_init_hdev(struct sock *sk, struct hci_dev *hdev) @@ -994,11 +1000,64 @@ failed: return err; } +static void write_fast_connectable(struct hci_request *req, bool enable) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_write_page_scan_activity acp; + u8 type; + + if (hdev->hci_ver < BLUETOOTH_VER_1_2) + return; + + if (enable) { + type = PAGE_SCAN_TYPE_INTERLACED; + + /* 160 msec page scan interval */ + acp.interval = __constant_cpu_to_le16(0x0100); + } else { + type = PAGE_SCAN_TYPE_STANDARD; /* default */ + + /* default 1.28 sec page scan */ + acp.interval = __constant_cpu_to_le16(0x0800); + } + + acp.window = __constant_cpu_to_le16(0x0012); + + if (__cpu_to_le16(hdev->page_scan_interval) != acp.interval || + __cpu_to_le16(hdev->page_scan_window) != acp.window) + hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_ACTIVITY, + sizeof(acp), &acp); + + if (hdev->page_scan_type != type) + hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type); +} + +static void set_connectable_complete(struct hci_dev *hdev, u8 status) +{ + struct pending_cmd *cmd; + + BT_DBG("status 0x%02x", status); + + hci_dev_lock(hdev); + + cmd = mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, hdev); + if (!cmd) + goto unlock; + + send_settings_rsp(cmd->sk, MGMT_OP_SET_CONNECTABLE, hdev); + + mgmt_pending_remove(cmd); + +unlock: + hci_dev_unlock(hdev); +} + static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; struct pending_cmd *cmd; + struct hci_request req; u8 scan; int err; @@ -1065,7 +1124,20 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, cancel_delayed_work(&hdev->discov_off); } - err = hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); + hci_req_init(&req, hdev); + + hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); + + /* If we're going from non-connectable to connectable or + * vice-versa when fast connectable is enabled ensure that fast + * connectable gets disabled. write_fast_connectable won't do + * anything if the page scan parameters are already what they + * should be. + */ + if (cp->val || test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags)) + write_fast_connectable(&req, false); + + err = hci_req_run(&req, set_connectable_complete); if (err < 0) mgmt_pending_remove(cmd); @@ -1332,6 +1404,29 @@ unlock: return err; } +/* This is a helper function to test for pending mgmt commands that can + * cause CoD or EIR HCI commands. We can only allow one such pending + * mgmt command at a time since otherwise we cannot easily track what + * the current values are, will be, and based on that calculate if a new + * HCI command needs to be sent and if yes with what value. + */ +static bool pending_eir_or_class(struct hci_dev *hdev) +{ + struct pending_cmd *cmd; + + list_for_each_entry(cmd, &hdev->mgmt_pending, list) { + switch (cmd->opcode) { + case MGMT_OP_ADD_UUID: + case MGMT_OP_REMOVE_UUID: + case MGMT_OP_SET_DEV_CLASS: + case MGMT_OP_SET_POWERED: + return true; + } + } + + return false; +} + static const u8 bluetooth_base_uuid[] = { 0xfb, 0x34, 0x9b, 0x5f, 0x80, 0x00, 0x00, 0x80, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -1351,10 +1446,37 @@ static u8 get_uuid_size(const u8 *uuid) return 16; } +static void mgmt_class_complete(struct hci_dev *hdev, u16 mgmt_op, u8 status) +{ + struct pending_cmd *cmd; + + hci_dev_lock(hdev); + + cmd = mgmt_pending_find(mgmt_op, hdev); + if (!cmd) + goto unlock; + + cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(status), + hdev->dev_class, 3); + + mgmt_pending_remove(cmd); + +unlock: + hci_dev_unlock(hdev); +} + +static void add_uuid_complete(struct hci_dev *hdev, u8 status) +{ + BT_DBG("status 0x%02x", status); + + mgmt_class_complete(hdev, MGMT_OP_ADD_UUID, status); +} + static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_add_uuid *cp = data; struct pending_cmd *cmd; + struct hci_request req; struct bt_uuid *uuid; int err; @@ -1362,7 +1484,7 @@ static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) hci_dev_lock(hdev); - if (test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) { + if (pending_eir_or_class(hdev)) { err = cmd_status(sk, hdev->id, MGMT_OP_ADD_UUID, MGMT_STATUS_BUSY); goto failed; @@ -1380,23 +1502,28 @@ static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) list_add_tail(&uuid->list, &hdev->uuids); - err = update_class(hdev); - if (err < 0) - goto failed; + hci_req_init(&req, hdev); - err = update_eir(hdev); - if (err < 0) - goto failed; + update_class(&req); + update_eir(&req); + + err = hci_req_run(&req, add_uuid_complete); + if (err < 0) { + if (err != -ENODATA) + goto failed; - if (!test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) { err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_UUID, 0, hdev->dev_class, 3); goto failed; } cmd = mgmt_pending_add(sk, MGMT_OP_ADD_UUID, hdev, data, len); - if (!cmd) + if (!cmd) { err = -ENOMEM; + goto failed; + } + + err = 0; failed: hci_dev_unlock(hdev); @@ -1417,6 +1544,13 @@ static bool enable_service_cache(struct hci_dev *hdev) return false; } +static void remove_uuid_complete(struct hci_dev *hdev, u8 status) +{ + BT_DBG("status 0x%02x", status); + + mgmt_class_complete(hdev, MGMT_OP_REMOVE_UUID, status); +} + static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { @@ -1424,13 +1558,14 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data, struct pending_cmd *cmd; struct bt_uuid *match, *tmp; u8 bt_uuid_any[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + struct hci_request req; int err, found; BT_DBG("request for %s", hdev->name); hci_dev_lock(hdev); - if (test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) { + if (pending_eir_or_class(hdev)) { err = cmd_status(sk, hdev->id, MGMT_OP_REMOVE_UUID, MGMT_STATUS_BUSY); goto unlock; @@ -1466,34 +1601,47 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data, } update_class: - err = update_class(hdev); - if (err < 0) - goto unlock; + hci_req_init(&req, hdev); - err = update_eir(hdev); - if (err < 0) - goto unlock; + update_class(&req); + update_eir(&req); + + err = hci_req_run(&req, remove_uuid_complete); + if (err < 0) { + if (err != -ENODATA) + goto unlock; - if (!test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) { err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_UUID, 0, hdev->dev_class, 3); goto unlock; } cmd = mgmt_pending_add(sk, MGMT_OP_REMOVE_UUID, hdev, data, len); - if (!cmd) + if (!cmd) { err = -ENOMEM; + goto unlock; + } + + err = 0; unlock: hci_dev_unlock(hdev); return err; } +static void set_class_complete(struct hci_dev *hdev, u8 status) +{ + BT_DBG("status 0x%02x", status); + + mgmt_class_complete(hdev, MGMT_OP_SET_DEV_CLASS, status); +} + static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_set_dev_class *cp = data; struct pending_cmd *cmd; + struct hci_request req; int err; BT_DBG("request for %s", hdev->name); @@ -1502,15 +1650,19 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data, return cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, MGMT_STATUS_NOT_SUPPORTED); - if (test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) - return cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, - MGMT_STATUS_BUSY); + hci_dev_lock(hdev); - if ((cp->minor & 0x03) != 0 || (cp->major & 0xe0) != 0) - return cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, - MGMT_STATUS_INVALID_PARAMS); + if (pending_eir_or_class(hdev)) { + err = cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, + MGMT_STATUS_BUSY); + goto unlock; + } - hci_dev_lock(hdev); + if ((cp->minor & 0x03) != 0 || (cp->major & 0xe0) != 0) { + err = cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, + MGMT_STATUS_INVALID_PARAMS); + goto unlock; + } hdev->major_class = cp->major; hdev->minor_class = cp->minor; @@ -1521,26 +1673,34 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data, goto unlock; } + hci_req_init(&req, hdev); + if (test_and_clear_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) { hci_dev_unlock(hdev); cancel_delayed_work_sync(&hdev->service_cache); hci_dev_lock(hdev); - update_eir(hdev); + update_eir(&req); } - err = update_class(hdev); - if (err < 0) - goto unlock; + update_class(&req); + + err = hci_req_run(&req, set_class_complete); + if (err < 0) { + if (err != -ENODATA) + goto unlock; - if (!test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) { err = cmd_complete(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, 0, hdev->dev_class, 3); goto unlock; } cmd = mgmt_pending_add(sk, MGMT_OP_SET_DEV_CLASS, hdev, data, len); - if (!cmd) + if (!cmd) { err = -ENOMEM; + goto unlock; + } + + err = 0; unlock: hci_dev_unlock(hdev); @@ -2140,7 +2300,7 @@ unlock: } static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev, - bdaddr_t *bdaddr, u8 type, u16 mgmt_op, + struct mgmt_addr_info *addr, u16 mgmt_op, u16 hci_op, __le32 passkey) { struct pending_cmd *cmd; @@ -2150,37 +2310,41 @@ static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev, hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { - err = cmd_status(sk, hdev->id, mgmt_op, - MGMT_STATUS_NOT_POWERED); + err = cmd_complete(sk, hdev->id, mgmt_op, + MGMT_STATUS_NOT_POWERED, addr, + sizeof(*addr)); goto done; } - if (type == BDADDR_BREDR) - conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, bdaddr); + if (addr->type == BDADDR_BREDR) + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &addr->bdaddr); else - conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, bdaddr); + conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &addr->bdaddr); if (!conn) { - err = cmd_status(sk, hdev->id, mgmt_op, - MGMT_STATUS_NOT_CONNECTED); + err = cmd_complete(sk, hdev->id, mgmt_op, + MGMT_STATUS_NOT_CONNECTED, addr, + sizeof(*addr)); goto done; } - if (type == BDADDR_LE_PUBLIC || type == BDADDR_LE_RANDOM) { + if (addr->type == BDADDR_LE_PUBLIC || addr->type == BDADDR_LE_RANDOM) { /* Continue with pairing via SMP */ err = smp_user_confirm_reply(conn, mgmt_op, passkey); if (!err) - err = cmd_status(sk, hdev->id, mgmt_op, - MGMT_STATUS_SUCCESS); + err = cmd_complete(sk, hdev->id, mgmt_op, + MGMT_STATUS_SUCCESS, addr, + sizeof(*addr)); else - err = cmd_status(sk, hdev->id, mgmt_op, - MGMT_STATUS_FAILED); + err = cmd_complete(sk, hdev->id, mgmt_op, + MGMT_STATUS_FAILED, addr, + sizeof(*addr)); goto done; } - cmd = mgmt_pending_add(sk, mgmt_op, hdev, bdaddr, sizeof(*bdaddr)); + cmd = mgmt_pending_add(sk, mgmt_op, hdev, addr, sizeof(*addr)); if (!cmd) { err = -ENOMEM; goto done; @@ -2190,11 +2354,12 @@ static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev, if (hci_op == HCI_OP_USER_PASSKEY_REPLY) { struct hci_cp_user_passkey_reply cp; - bacpy(&cp.bdaddr, bdaddr); + bacpy(&cp.bdaddr, &addr->bdaddr); cp.passkey = passkey; err = hci_send_cmd(hdev, hci_op, sizeof(cp), &cp); } else - err = hci_send_cmd(hdev, hci_op, sizeof(*bdaddr), bdaddr); + err = hci_send_cmd(hdev, hci_op, sizeof(addr->bdaddr), + &addr->bdaddr); if (err < 0) mgmt_pending_remove(cmd); @@ -2211,7 +2376,7 @@ static int pin_code_neg_reply(struct sock *sk, struct hci_dev *hdev, BT_DBG(""); - return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type, + return user_pairing_resp(sk, hdev, &cp->addr, MGMT_OP_PIN_CODE_NEG_REPLY, HCI_OP_PIN_CODE_NEG_REPLY, 0); } @@ -2227,7 +2392,7 @@ static int user_confirm_reply(struct sock *sk, struct hci_dev *hdev, void *data, return cmd_status(sk, hdev->id, MGMT_OP_USER_CONFIRM_REPLY, MGMT_STATUS_INVALID_PARAMS); - return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type, + return user_pairing_resp(sk, hdev, &cp->addr, MGMT_OP_USER_CONFIRM_REPLY, HCI_OP_USER_CONFIRM_REPLY, 0); } @@ -2239,7 +2404,7 @@ static int user_confirm_neg_reply(struct sock *sk, struct hci_dev *hdev, BT_DBG(""); - return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type, + return user_pairing_resp(sk, hdev, &cp->addr, MGMT_OP_USER_CONFIRM_NEG_REPLY, HCI_OP_USER_CONFIRM_NEG_REPLY, 0); } @@ -2251,7 +2416,7 @@ static int user_passkey_reply(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG(""); - return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type, + return user_pairing_resp(sk, hdev, &cp->addr, MGMT_OP_USER_PASSKEY_REPLY, HCI_OP_USER_PASSKEY_REPLY, cp->passkey); } @@ -2263,18 +2428,47 @@ static int user_passkey_neg_reply(struct sock *sk, struct hci_dev *hdev, BT_DBG(""); - return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type, + return user_pairing_resp(sk, hdev, &cp->addr, MGMT_OP_USER_PASSKEY_NEG_REPLY, HCI_OP_USER_PASSKEY_NEG_REPLY, 0); } -static int update_name(struct hci_dev *hdev, const char *name) +static void update_name(struct hci_request *req) { + struct hci_dev *hdev = req->hdev; struct hci_cp_write_local_name cp; - memcpy(cp.name, name, sizeof(cp.name)); + memcpy(cp.name, hdev->dev_name, sizeof(cp.name)); + + hci_req_add(req, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp); +} + +static void set_name_complete(struct hci_dev *hdev, u8 status) +{ + struct mgmt_cp_set_local_name *cp; + struct pending_cmd *cmd; + + BT_DBG("status 0x%02x", status); + + hci_dev_lock(hdev); + + cmd = mgmt_pending_find(MGMT_OP_SET_LOCAL_NAME, hdev); + if (!cmd) + goto unlock; + + cp = cmd->param; - return hci_send_cmd(hdev, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp); + if (status) + cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, + mgmt_status(status)); + else + cmd_complete(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0, + cp, sizeof(*cp)); + + mgmt_pending_remove(cmd); + +unlock: + hci_dev_unlock(hdev); } static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, @@ -2282,12 +2476,24 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, { struct mgmt_cp_set_local_name *cp = data; struct pending_cmd *cmd; + struct hci_request req; int err; BT_DBG(""); hci_dev_lock(hdev); + /* If the old values are the same as the new ones just return a + * direct command complete event. + */ + if (!memcmp(hdev->dev_name, cp->name, sizeof(hdev->dev_name)) && + !memcmp(hdev->short_name, cp->short_name, + sizeof(hdev->short_name))) { + err = cmd_complete(sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0, + data, len); + goto failed; + } + memcpy(hdev->short_name, cp->short_name, sizeof(hdev->short_name)); if (!hdev_is_powered(hdev)) { @@ -2310,7 +2516,19 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, goto failed; } - err = update_name(hdev, cp->name); + memcpy(hdev->dev_name, cp->name, sizeof(hdev->dev_name)); + + hci_req_init(&req, hdev); + + if (lmp_bredr_capable(hdev)) { + update_name(&req); + update_eir(&req); + } + + if (lmp_le_capable(hdev)) + hci_update_ad(&req); + + err = hci_req_run(&req, set_name_complete); if (err < 0) mgmt_pending_remove(cmd); @@ -2698,6 +2916,7 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_set_device_id *cp = data; + struct hci_request req; int err; __u16 source; @@ -2718,24 +2937,59 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data, err = cmd_complete(sk, hdev->id, MGMT_OP_SET_DEVICE_ID, 0, NULL, 0); - update_eir(hdev); + hci_req_init(&req, hdev); + update_eir(&req); + hci_req_run(&req, NULL); hci_dev_unlock(hdev); return err; } +static void fast_connectable_complete(struct hci_dev *hdev, u8 status) +{ + struct pending_cmd *cmd; + + BT_DBG("status 0x%02x", status); + + hci_dev_lock(hdev); + + cmd = mgmt_pending_find(MGMT_OP_SET_FAST_CONNECTABLE, hdev); + if (!cmd) + goto unlock; + + if (status) { + cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, + mgmt_status(status)); + } else { + struct mgmt_mode *cp = cmd->param; + + if (cp->val) + set_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags); + else + clear_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags); + + send_settings_rsp(cmd->sk, MGMT_OP_SET_FAST_CONNECTABLE, hdev); + new_settings(hdev, cmd->sk); + } + + mgmt_pending_remove(cmd); + +unlock: + hci_dev_unlock(hdev); +} + static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; - struct hci_cp_write_page_scan_activity acp; - u8 type; + struct pending_cmd *cmd; + struct hci_request req; int err; BT_DBG("%s", hdev->name); - if (!lmp_bredr_capable(hdev)) + if (!lmp_bredr_capable(hdev) || hdev->hci_ver < BLUETOOTH_VER_1_2) return cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, MGMT_STATUS_NOT_SUPPORTED); @@ -2753,40 +3007,39 @@ static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev, hci_dev_lock(hdev); - if (cp->val) { - type = PAGE_SCAN_TYPE_INTERLACED; + if (mgmt_pending_find(MGMT_OP_SET_FAST_CONNECTABLE, hdev)) { + err = cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, + MGMT_STATUS_BUSY); + goto unlock; + } - /* 160 msec page scan interval */ - acp.interval = __constant_cpu_to_le16(0x0100); - } else { - type = PAGE_SCAN_TYPE_STANDARD; /* default */ + if (!!cp->val == test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags)) { + err = send_settings_rsp(sk, MGMT_OP_SET_FAST_CONNECTABLE, + hdev); + goto unlock; + } - /* default 1.28 sec page scan */ - acp.interval = __constant_cpu_to_le16(0x0800); + cmd = mgmt_pending_add(sk, MGMT_OP_SET_FAST_CONNECTABLE, hdev, + data, len); + if (!cmd) { + err = -ENOMEM; + goto unlock; } - /* default 11.25 msec page scan window */ - acp.window = __constant_cpu_to_le16(0x0012); + hci_req_init(&req, hdev); - err = hci_send_cmd(hdev, HCI_OP_WRITE_PAGE_SCAN_ACTIVITY, sizeof(acp), - &acp); - if (err < 0) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, - MGMT_STATUS_FAILED); - goto done; - } + write_fast_connectable(&req, cp->val); - err = hci_send_cmd(hdev, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type); + err = hci_req_run(&req, fast_connectable_complete); if (err < 0) { err = cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, MGMT_STATUS_FAILED); - goto done; + mgmt_pending_remove(cmd); } - err = cmd_complete(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, 0, - NULL, 0); -done: +unlock: hci_dev_unlock(hdev); + return err; } @@ -3043,79 +3296,115 @@ static void settings_rsp(struct pending_cmd *cmd, void *data) mgmt_pending_free(cmd); } -static int set_bredr_scan(struct hci_dev *hdev) +static void set_bredr_scan(struct hci_request *req) { + struct hci_dev *hdev = req->hdev; u8 scan = 0; + /* Ensure that fast connectable is disabled. This function will + * not do anything if the page scan parameters are already what + * they should be. + */ + write_fast_connectable(req, false); + if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags)) scan |= SCAN_PAGE; if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags)) scan |= SCAN_INQUIRY; - if (!scan) - return 0; - - return hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); + if (scan) + hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); } -int mgmt_powered(struct hci_dev *hdev, u8 powered) +static void powered_complete(struct hci_dev *hdev, u8 status) { struct cmd_lookup match = { NULL, hdev }; - int err; - if (!test_bit(HCI_MGMT, &hdev->dev_flags)) - return 0; + BT_DBG("status 0x%02x", status); + + hci_dev_lock(hdev); mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); - if (powered) { - u8 link_sec; + new_settings(hdev, match.sk); - if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) && - !lmp_host_ssp_capable(hdev)) { - u8 ssp = 1; + hci_dev_unlock(hdev); - hci_send_cmd(hdev, HCI_OP_WRITE_SSP_MODE, 1, &ssp); - } + if (match.sk) + sock_put(match.sk); +} - if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) { - struct hci_cp_write_le_host_supported cp; +static int powered_update_hci(struct hci_dev *hdev) +{ + struct hci_request req; + u8 link_sec; - cp.le = 1; - cp.simul = lmp_le_br_capable(hdev); + hci_req_init(&req, hdev); - /* Check first if we already have the right - * host state (host features set) - */ - if (cp.le != lmp_host_le_capable(hdev) || - cp.simul != lmp_host_le_br_capable(hdev)) - hci_send_cmd(hdev, - HCI_OP_WRITE_LE_HOST_SUPPORTED, - sizeof(cp), &cp); - } + if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) && + !lmp_host_ssp_capable(hdev)) { + u8 ssp = 1; - link_sec = test_bit(HCI_LINK_SECURITY, &hdev->dev_flags); - if (link_sec != test_bit(HCI_AUTH, &hdev->flags)) - hci_send_cmd(hdev, HCI_OP_WRITE_AUTH_ENABLE, - sizeof(link_sec), &link_sec); + hci_req_add(&req, HCI_OP_WRITE_SSP_MODE, 1, &ssp); + } - if (lmp_bredr_capable(hdev)) { - set_bredr_scan(hdev); - update_class(hdev); - update_name(hdev, hdev->dev_name); - update_eir(hdev); - } - } else { - u8 status = MGMT_STATUS_NOT_POWERED; - u8 zero_cod[] = { 0, 0, 0 }; + if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) { + struct hci_cp_write_le_host_supported cp; - mgmt_pending_foreach(0, hdev, cmd_status_rsp, &status); + cp.le = 1; + cp.simul = lmp_le_br_capable(hdev); - if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) - mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, - zero_cod, sizeof(zero_cod), NULL); + /* Check first if we already have the right + * host state (host features set) + */ + if (cp.le != lmp_host_le_capable(hdev) || + cp.simul != lmp_host_le_br_capable(hdev)) + hci_req_add(&req, HCI_OP_WRITE_LE_HOST_SUPPORTED, + sizeof(cp), &cp); } + link_sec = test_bit(HCI_LINK_SECURITY, &hdev->dev_flags); + if (link_sec != test_bit(HCI_AUTH, &hdev->flags)) + hci_req_add(&req, HCI_OP_WRITE_AUTH_ENABLE, + sizeof(link_sec), &link_sec); + + if (lmp_bredr_capable(hdev)) { + set_bredr_scan(&req); + update_class(&req); + update_name(&req); + update_eir(&req); + } + + return hci_req_run(&req, powered_complete); +} + +int mgmt_powered(struct hci_dev *hdev, u8 powered) +{ + struct cmd_lookup match = { NULL, hdev }; + u8 status_not_powered = MGMT_STATUS_NOT_POWERED; + u8 zero_cod[] = { 0, 0, 0 }; + int err; + + if (!test_bit(HCI_MGMT, &hdev->dev_flags)) + return 0; + + if (powered) { + if (powered_update_hci(hdev) == 0) + return 0; + + mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, + &match); + goto new_settings; + } + + mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); + mgmt_pending_foreach(0, hdev, cmd_status_rsp, &status_not_powered); + + if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) + mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, + zero_cod, sizeof(zero_cod), NULL); + +new_settings: err = new_settings(hdev, match.sk); if (match.sk) @@ -3152,7 +3441,7 @@ int mgmt_discoverable(struct hci_dev *hdev, u8 discoverable) int mgmt_connectable(struct hci_dev *hdev, u8 connectable) { - struct cmd_lookup match = { NULL, hdev }; + struct pending_cmd *cmd; bool changed = false; int err = 0; @@ -3164,14 +3453,10 @@ int mgmt_connectable(struct hci_dev *hdev, u8 connectable) changed = true; } - mgmt_pending_foreach(MGMT_OP_SET_CONNECTABLE, hdev, settings_rsp, - &match); + cmd = mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, hdev); if (changed) - err = new_settings(hdev, match.sk); - - if (match.sk) - sock_put(match.sk); + err = new_settings(hdev, cmd ? cmd->sk : NULL); return err; } @@ -3555,23 +3840,25 @@ int mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status) return err; } -static int clear_eir(struct hci_dev *hdev) +static void clear_eir(struct hci_request *req) { + struct hci_dev *hdev = req->hdev; struct hci_cp_write_eir cp; if (!lmp_ext_inq_capable(hdev)) - return 0; + return; memset(hdev->eir, 0, sizeof(hdev->eir)); memset(&cp, 0, sizeof(cp)); - return hci_send_cmd(hdev, HCI_OP_WRITE_EIR, sizeof(cp), &cp); + hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp); } int mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status) { struct cmd_lookup match = { NULL, hdev }; + struct hci_request req; bool changed = false; int err = 0; @@ -3604,29 +3891,26 @@ int mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status) if (match.sk) sock_put(match.sk); + hci_req_init(&req, hdev); + if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) - update_eir(hdev); + update_eir(&req); else - clear_eir(hdev); + clear_eir(&req); + + hci_req_run(&req, NULL); return err; } -static void class_rsp(struct pending_cmd *cmd, void *data) +static void sk_lookup(struct pending_cmd *cmd, void *data) { struct cmd_lookup *match = data; - cmd_complete(cmd->sk, cmd->index, cmd->opcode, match->mgmt_status, - match->hdev->dev_class, 3); - - list_del(&cmd->list); - if (match->sk == NULL) { match->sk = cmd->sk; sock_hold(match->sk); } - - mgmt_pending_free(cmd); } int mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, @@ -3635,11 +3919,9 @@ int mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, struct cmd_lookup match = { NULL, hdev, mgmt_status(status) }; int err = 0; - clear_bit(HCI_PENDING_CLASS, &hdev->dev_flags); - - mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, class_rsp, &match); - mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, class_rsp, &match); - mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, class_rsp, &match); + mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, sk_lookup, &match); + mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, sk_lookup, &match); + mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, sk_lookup, &match); if (!status) err = mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, dev_class, @@ -3653,55 +3935,29 @@ int mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, int mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status) { - struct pending_cmd *cmd; struct mgmt_cp_set_local_name ev; - bool changed = false; - int err = 0; + struct pending_cmd *cmd; - if (memcmp(name, hdev->dev_name, sizeof(hdev->dev_name)) != 0) { - memcpy(hdev->dev_name, name, sizeof(hdev->dev_name)); - changed = true; - } + if (status) + return 0; memset(&ev, 0, sizeof(ev)); memcpy(ev.name, name, HCI_MAX_NAME_LENGTH); memcpy(ev.short_name, hdev->short_name, HCI_MAX_SHORT_NAME_LENGTH); cmd = mgmt_pending_find(MGMT_OP_SET_LOCAL_NAME, hdev); - if (!cmd) - goto send_event; - - /* Always assume that either the short or the complete name has - * changed if there was a pending mgmt command */ - changed = true; + if (!cmd) { + memcpy(hdev->dev_name, name, sizeof(hdev->dev_name)); - if (status) { - err = cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, - mgmt_status(status)); - goto failed; + /* If this is a HCI command related to powering on the + * HCI dev don't send any mgmt signals. + */ + if (mgmt_pending_find(MGMT_OP_SET_POWERED, hdev)) + return 0; } - err = cmd_complete(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0, &ev, - sizeof(ev)); - if (err < 0) - goto failed; - -send_event: - if (changed) - err = mgmt_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev, - sizeof(ev), cmd ? cmd->sk : NULL); - - /* EIR is taken care of separately when powering on the - * adapter so only update them here if this is a name change - * unrelated to power on. - */ - if (!test_bit(HCI_INIT, &hdev->flags)) - update_eir(hdev); - -failed: - if (cmd) - mgmt_pending_remove(cmd); - return err; + return mgmt_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev, sizeof(ev), + cmd ? cmd->sk : NULL); } int mgmt_read_local_oob_data_reply_complete(struct hci_dev *hdev, u8 *hash, diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index b23e2713fea8..ca957d34b0c8 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -69,7 +69,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, u8 sec_level, int *err); static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst); -static void rfcomm_session_del(struct rfcomm_session *s); +static struct rfcomm_session *rfcomm_session_del(struct rfcomm_session *s); /* ---- RFCOMM frame parsing macros ---- */ #define __get_dlci(b) ((b & 0xfc) >> 2) @@ -108,12 +108,6 @@ static void rfcomm_schedule(void) wake_up_process(rfcomm_thread); } -static void rfcomm_session_put(struct rfcomm_session *s) -{ - if (atomic_dec_and_test(&s->refcnt)) - rfcomm_session_del(s); -} - /* ---- RFCOMM FCS computation ---- */ /* reversed, 8-bit, poly=0x07 */ @@ -249,16 +243,14 @@ static void rfcomm_session_set_timer(struct rfcomm_session *s, long timeout) { BT_DBG("session %p state %ld timeout %ld", s, s->state, timeout); - if (!mod_timer(&s->timer, jiffies + timeout)) - rfcomm_session_hold(s); + mod_timer(&s->timer, jiffies + timeout); } static void rfcomm_session_clear_timer(struct rfcomm_session *s) { BT_DBG("session %p state %ld", s, s->state); - if (del_timer(&s->timer)) - rfcomm_session_put(s); + del_timer_sync(&s->timer); } /* ---- RFCOMM DLCs ---- */ @@ -336,8 +328,6 @@ static void rfcomm_dlc_link(struct rfcomm_session *s, struct rfcomm_dlc *d) { BT_DBG("dlc %p session %p", d, s); - rfcomm_session_hold(s); - rfcomm_session_clear_timer(s); rfcomm_dlc_hold(d); list_add(&d->list, &s->dlcs); @@ -356,8 +346,6 @@ static void rfcomm_dlc_unlink(struct rfcomm_dlc *d) if (list_empty(&s->dlcs)) rfcomm_session_set_timer(s, RFCOMM_IDLE_TIMEOUT); - - rfcomm_session_put(s); } static struct rfcomm_dlc *rfcomm_dlc_get(struct rfcomm_session *s, u8 dlci) @@ -493,12 +481,34 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err) int rfcomm_dlc_close(struct rfcomm_dlc *d, int err) { - int r; + int r = 0; + struct rfcomm_dlc *d_list; + struct rfcomm_session *s, *s_list; + + BT_DBG("dlc %p state %ld dlci %d err %d", d, d->state, d->dlci, err); rfcomm_lock(); - r = __rfcomm_dlc_close(d, err); + s = d->session; + if (!s) + goto no_session; + + /* after waiting on the mutex check the session still exists + * then check the dlc still exists + */ + list_for_each_entry(s_list, &session_list, list) { + if (s_list == s) { + list_for_each_entry(d_list, &s->dlcs, list) { + if (d_list == d) { + r = __rfcomm_dlc_close(d, err); + break; + } + } + break; + } + } +no_session: rfcomm_unlock(); return r; } @@ -609,7 +619,7 @@ static struct rfcomm_session *rfcomm_session_add(struct socket *sock, int state) return s; } -static void rfcomm_session_del(struct rfcomm_session *s) +static struct rfcomm_session *rfcomm_session_del(struct rfcomm_session *s) { int state = s->state; @@ -617,15 +627,14 @@ static void rfcomm_session_del(struct rfcomm_session *s) list_del(&s->list); - if (state == BT_CONNECTED) - rfcomm_send_disc(s, 0); - rfcomm_session_clear_timer(s); sock_release(s->sock); kfree(s); if (state != BT_LISTEN) module_put(THIS_MODULE); + + return NULL; } static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst) @@ -644,17 +653,16 @@ static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst) return NULL; } -static void rfcomm_session_close(struct rfcomm_session *s, int err) +static struct rfcomm_session *rfcomm_session_close(struct rfcomm_session *s, + int err) { struct rfcomm_dlc *d; struct list_head *p, *n; - BT_DBG("session %p state %ld err %d", s, s->state, err); - - rfcomm_session_hold(s); - s->state = BT_CLOSED; + BT_DBG("session %p state %ld err %d", s, s->state, err); + /* Close all dlcs */ list_for_each_safe(p, n, &s->dlcs) { d = list_entry(p, struct rfcomm_dlc, list); @@ -663,7 +671,7 @@ static void rfcomm_session_close(struct rfcomm_session *s, int err) } rfcomm_session_clear_timer(s); - rfcomm_session_put(s); + return rfcomm_session_del(s); } static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, @@ -715,8 +723,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, if (*err == 0 || *err == -EINPROGRESS) return s; - rfcomm_session_del(s); - return NULL; + return rfcomm_session_del(s); failed: sock_release(sock); @@ -1105,7 +1112,7 @@ static void rfcomm_make_uih(struct sk_buff *skb, u8 addr) } /* ---- RFCOMM frame reception ---- */ -static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci) +static struct rfcomm_session *rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci) { BT_DBG("session %p state %ld dlci %d", s, s->state, dlci); @@ -1114,7 +1121,7 @@ static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci) struct rfcomm_dlc *d = rfcomm_dlc_get(s, dlci); if (!d) { rfcomm_send_dm(s, dlci); - return 0; + return s; } switch (d->state) { @@ -1150,25 +1157,14 @@ static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci) break; case BT_DISCONN: - /* rfcomm_session_put is called later so don't do - * anything here otherwise we will mess up the session - * reference counter: - * - * (a) when we are the initiator dlc_unlink will drive - * the reference counter to 0 (there is no initial put - * after session_add) - * - * (b) when we are not the initiator rfcomm_rx_process - * will explicitly call put to balance the initial hold - * done after session add. - */ + s = rfcomm_session_close(s, ECONNRESET); break; } } - return 0; + return s; } -static int rfcomm_recv_dm(struct rfcomm_session *s, u8 dlci) +static struct rfcomm_session *rfcomm_recv_dm(struct rfcomm_session *s, u8 dlci) { int err = 0; @@ -1192,13 +1188,13 @@ static int rfcomm_recv_dm(struct rfcomm_session *s, u8 dlci) else err = ECONNRESET; - s->state = BT_CLOSED; - rfcomm_session_close(s, err); + s = rfcomm_session_close(s, err); } - return 0; + return s; } -static int rfcomm_recv_disc(struct rfcomm_session *s, u8 dlci) +static struct rfcomm_session *rfcomm_recv_disc(struct rfcomm_session *s, + u8 dlci) { int err = 0; @@ -1227,11 +1223,9 @@ static int rfcomm_recv_disc(struct rfcomm_session *s, u8 dlci) else err = ECONNRESET; - s->state = BT_CLOSED; - rfcomm_session_close(s, err); + s = rfcomm_session_close(s, err); } - - return 0; + return s; } void rfcomm_dlc_accept(struct rfcomm_dlc *d) @@ -1652,11 +1646,18 @@ drop: return 0; } -static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb) +static struct rfcomm_session *rfcomm_recv_frame(struct rfcomm_session *s, + struct sk_buff *skb) { struct rfcomm_hdr *hdr = (void *) skb->data; u8 type, dlci, fcs; + if (!s) { + /* no session, so free socket data */ + kfree_skb(skb); + return s; + } + dlci = __get_dlci(hdr->addr); type = __get_type(hdr->ctrl); @@ -1667,7 +1668,7 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb) if (__check_fcs(skb->data, type, fcs)) { BT_ERR("bad checksum in packet"); kfree_skb(skb); - return -EILSEQ; + return s; } if (__test_ea(hdr->len)) @@ -1683,22 +1684,23 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb) case RFCOMM_DISC: if (__test_pf(hdr->ctrl)) - rfcomm_recv_disc(s, dlci); + s = rfcomm_recv_disc(s, dlci); break; case RFCOMM_UA: if (__test_pf(hdr->ctrl)) - rfcomm_recv_ua(s, dlci); + s = rfcomm_recv_ua(s, dlci); break; case RFCOMM_DM: - rfcomm_recv_dm(s, dlci); + s = rfcomm_recv_dm(s, dlci); break; case RFCOMM_UIH: - if (dlci) - return rfcomm_recv_data(s, dlci, __test_pf(hdr->ctrl), skb); - + if (dlci) { + rfcomm_recv_data(s, dlci, __test_pf(hdr->ctrl), skb); + return s; + } rfcomm_recv_mcc(s, skb); break; @@ -1707,7 +1709,7 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb) break; } kfree_skb(skb); - return 0; + return s; } /* ---- Connection and data processing ---- */ @@ -1844,7 +1846,7 @@ static void rfcomm_process_dlcs(struct rfcomm_session *s) } } -static void rfcomm_process_rx(struct rfcomm_session *s) +static struct rfcomm_session *rfcomm_process_rx(struct rfcomm_session *s) { struct socket *sock = s->sock; struct sock *sk = sock->sk; @@ -1856,17 +1858,15 @@ static void rfcomm_process_rx(struct rfcomm_session *s) while ((skb = skb_dequeue(&sk->sk_receive_queue))) { skb_orphan(skb); if (!skb_linearize(skb)) - rfcomm_recv_frame(s, skb); + s = rfcomm_recv_frame(s, skb); else kfree_skb(skb); } - if (sk->sk_state == BT_CLOSED) { - if (!s->initiator) - rfcomm_session_put(s); + if (s && (sk->sk_state == BT_CLOSED)) + s = rfcomm_session_close(s, sk->sk_err); - rfcomm_session_close(s, sk->sk_err); - } + return s; } static void rfcomm_accept_connection(struct rfcomm_session *s) @@ -1891,8 +1891,6 @@ static void rfcomm_accept_connection(struct rfcomm_session *s) s = rfcomm_session_add(nsock, BT_OPEN); if (s) { - rfcomm_session_hold(s); - /* We should adjust MTU on incoming sessions. * L2CAP MTU minus UIH header and FCS. */ s->mtu = min(l2cap_pi(nsock->sk)->chan->omtu, @@ -1903,7 +1901,7 @@ static void rfcomm_accept_connection(struct rfcomm_session *s) sock_release(nsock); } -static void rfcomm_check_connection(struct rfcomm_session *s) +static struct rfcomm_session *rfcomm_check_connection(struct rfcomm_session *s) { struct sock *sk = s->sock->sk; @@ -1921,10 +1919,10 @@ static void rfcomm_check_connection(struct rfcomm_session *s) break; case BT_CLOSED: - s->state = BT_CLOSED; - rfcomm_session_close(s, sk->sk_err); + s = rfcomm_session_close(s, sk->sk_err); break; } + return s; } static void rfcomm_process_sessions(void) @@ -1940,7 +1938,6 @@ static void rfcomm_process_sessions(void) if (test_and_clear_bit(RFCOMM_TIMED_OUT, &s->flags)) { s->state = BT_DISCONN; rfcomm_send_disc(s, 0); - rfcomm_session_put(s); continue; } @@ -1949,21 +1946,18 @@ static void rfcomm_process_sessions(void) continue; } - rfcomm_session_hold(s); - switch (s->state) { case BT_BOUND: - rfcomm_check_connection(s); + s = rfcomm_check_connection(s); break; default: - rfcomm_process_rx(s); + s = rfcomm_process_rx(s); break; } - rfcomm_process_dlcs(s); - - rfcomm_session_put(s); + if (s) + rfcomm_process_dlcs(s); } rfcomm_unlock(); @@ -2010,10 +2004,11 @@ static int rfcomm_add_listener(bdaddr_t *ba) /* Add listening session */ s = rfcomm_session_add(sock, BT_LISTEN); - if (!s) + if (!s) { + err = -ENOMEM; goto failed; + } - rfcomm_session_hold(s); return 0; failed: sock_release(sock); @@ -2071,8 +2066,6 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt) if (!s) return; - rfcomm_session_hold(s); - list_for_each_safe(p, n, &s->dlcs) { d = list_entry(p, struct rfcomm_dlc, list); @@ -2104,8 +2097,6 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt) set_bit(RFCOMM_AUTH_REJECT, &d->flags); } - rfcomm_session_put(s); - rfcomm_schedule(); } diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index c23bae86263b..a8638b58c4bf 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -608,6 +608,7 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) { rfcomm_dlc_accept(d); + msg->msg_namelen = 0; return 0; } @@ -1065,8 +1066,7 @@ void __exit rfcomm_cleanup_sockets(void) debugfs_remove(rfcomm_sock_debugfs); - if (bt_sock_unregister(BTPROTO_RFCOMM) < 0) - BT_ERR("RFCOMM socket layer unregistration failed"); + bt_sock_unregister(BTPROTO_RFCOMM); proto_unregister(&rfcomm_proto); } diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index fad0302bdb32..2c8055350510 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -665,6 +665,7 @@ static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock, test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) { hci_conn_accept(pi->conn->hcon, 0); sk->sk_state = BT_CONFIG; + msg->msg_namelen = 0; release_sock(sk); return 0; @@ -1112,8 +1113,7 @@ void __exit sco_exit(void) debugfs_remove(sco_debugfs); - if (bt_sock_unregister(BTPROTO_SCO) < 0) - BT_ERR("SCO socket unregistration failed"); + bt_sock_unregister(BTPROTO_SCO); proto_unregister(&sco_proto); } diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index ef1b91431c6b..f17fcb3097c2 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -148,7 +148,6 @@ static void del_nbp(struct net_bridge_port *p) dev->priv_flags &= ~IFF_BRIDGE_PORT; netdev_rx_handler_unregister(dev); - synchronize_net(); netdev_upper_dev_unlink(dev, br->dev); diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 92de5e5f9db2..9878eb8204c5 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -78,6 +78,11 @@ ebt_log_packet(u_int8_t pf, unsigned int hooknum, const char *prefix) { unsigned int bitmask; + struct net *net = dev_net(in ? in : out); + + /* FIXME: Disabled from containers until syslog ns is supported */ + if (!net_eq(net, &init_net)) + return; spin_lock_bh(&ebt_log_lock); printk(KERN_SOH "%c%s IN=%s OUT=%s MAC source = %pM MAC dest = %pM proto = 0x%04x", @@ -176,17 +181,18 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_log_info *info = par->targinfo; struct nf_loginfo li; + struct net *net = dev_net(par->in ? par->in : par->out); li.type = NF_LOG_TYPE_LOG; li.u.log.level = info->loglevel; li.u.log.logflags = info->bitmask; if (info->bitmask & EBT_LOG_NFLOG) - nf_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in, - par->out, &li, "%s", info->prefix); + nf_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb, + par->in, par->out, &li, "%s", info->prefix); else ebt_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in, - par->out, &li, info->prefix); + par->out, &li, info->prefix); return EBT_CONTINUE; } @@ -206,19 +212,47 @@ static struct nf_logger ebt_log_logger __read_mostly = { .me = THIS_MODULE, }; +static int __net_init ebt_log_net_init(struct net *net) +{ + nf_log_set(net, NFPROTO_BRIDGE, &ebt_log_logger); + return 0; +} + +static void __net_exit ebt_log_net_fini(struct net *net) +{ + nf_log_unset(net, &ebt_log_logger); +} + +static struct pernet_operations ebt_log_net_ops = { + .init = ebt_log_net_init, + .exit = ebt_log_net_fini, +}; + static int __init ebt_log_init(void) { int ret; + ret = register_pernet_subsys(&ebt_log_net_ops); + if (ret < 0) + goto err_pernet; + ret = xt_register_target(&ebt_log_tg_reg); if (ret < 0) - return ret; + goto err_target; + nf_log_register(NFPROTO_BRIDGE, &ebt_log_logger); - return 0; + + return ret; + +err_target: + unregister_pernet_subsys(&ebt_log_net_ops); +err_pernet: + return ret; } static void __exit ebt_log_fini(void) { + unregister_pernet_subsys(&ebt_log_net_ops); nf_log_unregister(&ebt_log_logger); xt_unregister_target(&ebt_log_tg_reg); } diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c index 5be68bbcc341..59ac7952010d 100644 --- a/net/bridge/netfilter/ebt_nflog.c +++ b/net/bridge/netfilter/ebt_nflog.c @@ -24,14 +24,15 @@ ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_nflog_info *info = par->targinfo; struct nf_loginfo li; + struct net *net = dev_net(par->in ? par->in : par->out); li.type = NF_LOG_TYPE_ULOG; li.u.ulog.copy_len = info->len; li.u.ulog.group = info->group; li.u.ulog.qthreshold = info->threshold; - nf_log_packet(PF_BRIDGE, par->hooknum, skb, par->in, par->out, - &li, "%s", info->prefix); + nf_log_packet(net, PF_BRIDGE, par->hooknum, skb, par->in, + par->out, &li, "%s", info->prefix); return EBT_CONTINUE; } diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 961d870ab283..fc1905c51417 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -41,6 +41,7 @@ #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_ulog.h> #include <net/netfilter/nf_log.h> +#include <net/netns/generic.h> #include <net/sock.h> #include "../br_private.h" @@ -62,13 +63,22 @@ typedef struct { spinlock_t lock; /* the per-queue lock */ } ebt_ulog_buff_t; -static ebt_ulog_buff_t ulog_buffers[EBT_ULOG_MAXNLGROUPS]; -static struct sock *ebtulognl; +static int ebt_ulog_net_id __read_mostly; +struct ebt_ulog_net { + unsigned int nlgroup[EBT_ULOG_MAXNLGROUPS]; + ebt_ulog_buff_t ulog_buffers[EBT_ULOG_MAXNLGROUPS]; + struct sock *ebtulognl; +}; + +static struct ebt_ulog_net *ebt_ulog_pernet(struct net *net) +{ + return net_generic(net, ebt_ulog_net_id); +} /* send one ulog_buff_t to userspace */ -static void ulog_send(unsigned int nlgroup) +static void ulog_send(struct ebt_ulog_net *ebt, unsigned int nlgroup) { - ebt_ulog_buff_t *ub = &ulog_buffers[nlgroup]; + ebt_ulog_buff_t *ub = &ebt->ulog_buffers[nlgroup]; del_timer(&ub->timer); @@ -80,7 +90,7 @@ static void ulog_send(unsigned int nlgroup) ub->lastnlh->nlmsg_type = NLMSG_DONE; NETLINK_CB(ub->skb).dst_group = nlgroup + 1; - netlink_broadcast(ebtulognl, ub->skb, 0, nlgroup + 1, GFP_ATOMIC); + netlink_broadcast(ebt->ebtulognl, ub->skb, 0, nlgroup + 1, GFP_ATOMIC); ub->qlen = 0; ub->skb = NULL; @@ -89,10 +99,15 @@ static void ulog_send(unsigned int nlgroup) /* timer function to flush queue in flushtimeout time */ static void ulog_timer(unsigned long data) { - spin_lock_bh(&ulog_buffers[data].lock); - if (ulog_buffers[data].skb) - ulog_send(data); - spin_unlock_bh(&ulog_buffers[data].lock); + struct ebt_ulog_net *ebt = container_of((void *)data, + struct ebt_ulog_net, + nlgroup[*(unsigned int *)data]); + + ebt_ulog_buff_t *ub = &ebt->ulog_buffers[*(unsigned int *)data]; + spin_lock_bh(&ub->lock); + if (ub->skb) + ulog_send(ebt, *(unsigned int *)data); + spin_unlock_bh(&ub->lock); } static struct sk_buff *ulog_alloc_skb(unsigned int size) @@ -123,8 +138,10 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb, ebt_ulog_packet_msg_t *pm; size_t size, copy_len; struct nlmsghdr *nlh; + struct net *net = dev_net(in ? in : out); + struct ebt_ulog_net *ebt = ebt_ulog_pernet(net); unsigned int group = uloginfo->nlgroup; - ebt_ulog_buff_t *ub = &ulog_buffers[group]; + ebt_ulog_buff_t *ub = &ebt->ulog_buffers[group]; spinlock_t *lock = &ub->lock; ktime_t kt; @@ -146,7 +163,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb, if (!(ub->skb = ulog_alloc_skb(size))) goto unlock; } else if (size > skb_tailroom(ub->skb)) { - ulog_send(group); + ulog_send(ebt, group); if (!(ub->skb = ulog_alloc_skb(size))) goto unlock; @@ -205,7 +222,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb, ub->lastnlh = nlh; if (ub->qlen >= uloginfo->qthreshold) - ulog_send(group); + ulog_send(ebt, group); else if (!timer_pending(&ub->timer)) { ub->timer.expires = jiffies + flushtimeout * HZ / 100; add_timer(&ub->timer); @@ -277,47 +294,39 @@ static struct nf_logger ebt_ulog_logger __read_mostly = { .me = THIS_MODULE, }; -static int __init ebt_ulog_init(void) +static int __net_init ebt_ulog_net_init(struct net *net) { - int ret; int i; + struct ebt_ulog_net *ebt = ebt_ulog_pernet(net); + struct netlink_kernel_cfg cfg = { .groups = EBT_ULOG_MAXNLGROUPS, }; - if (nlbufsiz >= 128*1024) { - pr_warning("Netlink buffer has to be <= 128kB," - " please try a smaller nlbufsiz parameter.\n"); - return -EINVAL; - } - /* initialize ulog_buffers */ for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { - setup_timer(&ulog_buffers[i].timer, ulog_timer, i); - spin_lock_init(&ulog_buffers[i].lock); + ebt->nlgroup[i] = i; + setup_timer(&ebt->ulog_buffers[i].timer, ulog_timer, + (unsigned long)&ebt->nlgroup[i]); + spin_lock_init(&ebt->ulog_buffers[i].lock); } - ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, &cfg); - if (!ebtulognl) - ret = -ENOMEM; - else if ((ret = xt_register_target(&ebt_ulog_tg_reg)) != 0) - netlink_kernel_release(ebtulognl); + ebt->ebtulognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg); + if (!ebt->ebtulognl) + return -ENOMEM; - if (ret == 0) - nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger); - - return ret; + nf_log_set(net, NFPROTO_BRIDGE, &ebt_ulog_logger); + return 0; } -static void __exit ebt_ulog_fini(void) +static void __net_exit ebt_ulog_net_fini(struct net *net) { - ebt_ulog_buff_t *ub; int i; + struct ebt_ulog_net *ebt = ebt_ulog_pernet(net); - nf_log_unregister(&ebt_ulog_logger); - xt_unregister_target(&ebt_ulog_tg_reg); + nf_log_unset(net, &ebt_ulog_logger); for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { - ub = &ulog_buffers[i]; + ebt_ulog_buff_t *ub = &ebt->ulog_buffers[i]; del_timer(&ub->timer); if (ub->skb) { @@ -325,7 +334,49 @@ static void __exit ebt_ulog_fini(void) ub->skb = NULL; } } - netlink_kernel_release(ebtulognl); + netlink_kernel_release(ebt->ebtulognl); +} + +static struct pernet_operations ebt_ulog_net_ops = { + .init = ebt_ulog_net_init, + .exit = ebt_ulog_net_fini, + .id = &ebt_ulog_net_id, + .size = sizeof(struct ebt_ulog_net), +}; + +static int __init ebt_ulog_init(void) +{ + int ret; + + if (nlbufsiz >= 128*1024) { + pr_warn("Netlink buffer has to be <= 128kB," + "please try a smaller nlbufsiz parameter.\n"); + return -EINVAL; + } + + ret = register_pernet_subsys(&ebt_ulog_net_ops); + if (ret) + goto out_pernet; + + ret = xt_register_target(&ebt_ulog_tg_reg); + if (ret) + goto out_target; + + nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger); + + return 0; + +out_target: + unregister_pernet_subsys(&ebt_ulog_net_ops); +out_pernet: + return ret; +} + +static void __exit ebt_ulog_fini(void) +{ + nf_log_unregister(&ebt_ulog_logger); + xt_unregister_target(&ebt_ulog_tg_reg); + unregister_pernet_subsys(&ebt_ulog_net_ops); } module_init(ebt_ulog_init); diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 1d337e02bc63..630b8be6e748 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -286,6 +286,8 @@ static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock, if (m->msg_flags&MSG_OOB) goto read_error; + m->msg_namelen = 0; + skb = skb_recv_datagram(sk, flags, 0 , &ret); if (!skb) goto read_error; diff --git a/net/can/raw.c b/net/can/raw.c index c1764e41ddaf..1085e65f848e 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -711,9 +711,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); if (err < 0) goto free_skb; - err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); - if (err < 0) - goto free_skb; + + sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); skb->dev = dev; skb->sk = sk; diff --git a/net/core/datagram.c b/net/core/datagram.c index 368f9c3f9dc6..ebba65d7e0da 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -749,7 +749,9 @@ unsigned int datagram_poll(struct file *file, struct socket *sock, /* exceptional events? */ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR; + mask |= POLLERR | + (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); + if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= POLLRDHUP | POLLIN | POLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) diff --git a/net/core/dev.c b/net/core/dev.c index 2db88dfa99d7..3655ff927315 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1624,7 +1624,6 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) } skb_orphan(skb); - nf_reset(skb); if (unlikely(!is_skb_forwardable(dev, skb))) { atomic_long_inc(&dev->rx_dropped); @@ -1640,6 +1639,7 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) skb->mark = 0; secpath_reset(skb); nf_reset(skb); + nf_reset_trace(skb); return netif_rx(skb); } EXPORT_SYMBOL_GPL(dev_forward_skb); @@ -3318,6 +3318,7 @@ int netdev_rx_handler_register(struct net_device *dev, if (dev->rx_handler) return -EBUSY; + /* Note: rx_handler_data must be set before rx_handler */ rcu_assign_pointer(dev->rx_handler_data, rx_handler_data); rcu_assign_pointer(dev->rx_handler, rx_handler); @@ -3338,6 +3339,11 @@ void netdev_rx_handler_unregister(struct net_device *dev) ASSERT_RTNL(); RCU_INIT_POINTER(dev->rx_handler, NULL); + /* a reader seeing a non NULL rx_handler in a rcu_read_lock() + * section has a guarantee to see a non NULL rx_handler_data + * as well. + */ + synchronize_net(); RCU_INIT_POINTER(dev->rx_handler_data, NULL); } EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index bd2eb9d3e369..c013f38482a1 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -22,7 +22,8 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, - unsigned char addr_type, bool global) + unsigned char addr_type, bool global, + bool sync) { struct netdev_hw_addr *ha; int alloc_size; @@ -37,7 +38,7 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list, ha->type = addr_type; ha->refcount = 1; ha->global_use = global; - ha->synced = false; + ha->synced = sync; list_add_tail_rcu(&ha->list, &list->list); list->count++; @@ -46,7 +47,7 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list, static int __hw_addr_add_ex(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, - unsigned char addr_type, bool global) + unsigned char addr_type, bool global, bool sync) { struct netdev_hw_addr *ha; @@ -63,43 +64,62 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list, else ha->global_use = true; } + if (sync) { + if (ha->synced) + return 0; + else + ha->synced = true; + } ha->refcount++; return 0; } } - return __hw_addr_create_ex(list, addr, addr_len, addr_type, global); + return __hw_addr_create_ex(list, addr, addr_len, addr_type, global, + sync); } static int __hw_addr_add(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, unsigned char addr_type) { - return __hw_addr_add_ex(list, addr, addr_len, addr_type, false); + return __hw_addr_add_ex(list, addr, addr_len, addr_type, false, false); +} + +static int __hw_addr_del_entry(struct netdev_hw_addr_list *list, + struct netdev_hw_addr *ha, bool global, + bool sync) +{ + if (global && !ha->global_use) + return -ENOENT; + + if (sync && !ha->synced) + return -ENOENT; + + if (global) + ha->global_use = false; + + if (sync) + ha->synced = false; + + if (--ha->refcount) + return 0; + list_del_rcu(&ha->list); + kfree_rcu(ha, rcu_head); + list->count--; + return 0; } static int __hw_addr_del_ex(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, - unsigned char addr_type, bool global) + unsigned char addr_type, bool global, bool sync) { struct netdev_hw_addr *ha; list_for_each_entry(ha, &list->list, list) { if (!memcmp(ha->addr, addr, addr_len) && - (ha->type == addr_type || !addr_type)) { - if (global) { - if (!ha->global_use) - break; - else - ha->global_use = false; - } - if (--ha->refcount) - return 0; - list_del_rcu(&ha->list); - kfree_rcu(ha, rcu_head); - list->count--; - return 0; - } + (ha->type == addr_type || !addr_type)) + return __hw_addr_del_entry(list, ha, global, sync); } return -ENOENT; } @@ -108,7 +128,57 @@ static int __hw_addr_del(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, unsigned char addr_type) { - return __hw_addr_del_ex(list, addr, addr_len, addr_type, false); + return __hw_addr_del_ex(list, addr, addr_len, addr_type, false, false); +} + +static int __hw_addr_sync_one(struct netdev_hw_addr_list *to_list, + struct netdev_hw_addr *ha, + int addr_len) +{ + int err; + + err = __hw_addr_add_ex(to_list, ha->addr, addr_len, ha->type, + false, true); + if (err) + return err; + ha->sync_cnt++; + ha->refcount++; + + return 0; +} + +static void __hw_addr_unsync_one(struct netdev_hw_addr_list *to_list, + struct netdev_hw_addr_list *from_list, + struct netdev_hw_addr *ha, + int addr_len) +{ + int err; + + err = __hw_addr_del_ex(to_list, ha->addr, addr_len, ha->type, + false, true); + if (err) + return; + ha->sync_cnt--; + __hw_addr_del_entry(from_list, ha, false, true); +} + +static int __hw_addr_sync_multiple(struct netdev_hw_addr_list *to_list, + struct netdev_hw_addr_list *from_list, + int addr_len) +{ + int err = 0; + struct netdev_hw_addr *ha, *tmp; + + list_for_each_entry_safe(ha, tmp, &from_list->list, list) { + if (ha->sync_cnt == ha->refcount) { + __hw_addr_unsync_one(to_list, from_list, ha, addr_len); + } else { + err = __hw_addr_sync_one(to_list, ha, addr_len); + if (err) + break; + } + } + return err; } int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list, @@ -152,6 +222,11 @@ void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list, } EXPORT_SYMBOL(__hw_addr_del_multiple); +/* This function only works where there is a strict 1-1 relationship + * between source and destionation of they synch. If you ever need to + * sync addresses to more then 1 destination, you need to use + * __hw_addr_sync_multiple(). + */ int __hw_addr_sync(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr_list *from_list, int addr_len) @@ -160,17 +235,12 @@ int __hw_addr_sync(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr *ha, *tmp; list_for_each_entry_safe(ha, tmp, &from_list->list, list) { - if (!ha->synced) { - err = __hw_addr_add(to_list, ha->addr, - addr_len, ha->type); + if (!ha->sync_cnt) { + err = __hw_addr_sync_one(to_list, ha, addr_len); if (err) break; - ha->synced = true; - ha->refcount++; - } else if (ha->refcount == 1) { - __hw_addr_del(to_list, ha->addr, addr_len, ha->type); - __hw_addr_del(from_list, ha->addr, addr_len, ha->type); - } + } else if (ha->refcount == 1) + __hw_addr_unsync_one(to_list, from_list, ha, addr_len); } return err; } @@ -183,13 +253,8 @@ void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr *ha, *tmp; list_for_each_entry_safe(ha, tmp, &from_list->list, list) { - if (ha->synced) { - __hw_addr_del(to_list, ha->addr, - addr_len, ha->type); - ha->synced = false; - __hw_addr_del(from_list, ha->addr, - addr_len, ha->type); - } + if (ha->sync_cnt) + __hw_addr_unsync_one(to_list, from_list, ha, addr_len); } } EXPORT_SYMBOL(__hw_addr_unsync); @@ -406,7 +471,7 @@ int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr) } } err = __hw_addr_create_ex(&dev->uc, addr, dev->addr_len, - NETDEV_HW_ADDR_T_UNICAST, true); + NETDEV_HW_ADDR_T_UNICAST, true, false); if (!err) __dev_set_rx_mode(dev); out: @@ -469,7 +534,8 @@ EXPORT_SYMBOL(dev_uc_del); * locked by netif_addr_lock_bh. * * This function is intended to be called from the dev->set_rx_mode - * function of layered software devices. + * function of layered software devices. This function assumes that + * addresses will only ever be synced to the @to devices and no other. */ int dev_uc_sync(struct net_device *to, struct net_device *from) { @@ -488,6 +554,36 @@ int dev_uc_sync(struct net_device *to, struct net_device *from) EXPORT_SYMBOL(dev_uc_sync); /** + * dev_uc_sync_multiple - Synchronize device's unicast list to another + * device, but allow for multiple calls to sync to multiple devices. + * @to: destination device + * @from: source device + * + * Add newly added addresses to the destination device and release + * addresses that have been deleted from the source. The source device + * must be locked by netif_addr_lock_bh. + * + * This function is intended to be called from the dev->set_rx_mode + * function of layered software devices. It allows for a single source + * device to be synced to multiple destination devices. + */ +int dev_uc_sync_multiple(struct net_device *to, struct net_device *from) +{ + int err = 0; + + if (to->addr_len != from->addr_len) + return -EINVAL; + + netif_addr_lock_nested(to); + err = __hw_addr_sync_multiple(&to->uc, &from->uc, to->addr_len); + if (!err) + __dev_set_rx_mode(to); + netif_addr_unlock(to); + return err; +} +EXPORT_SYMBOL(dev_uc_sync_multiple); + +/** * dev_uc_unsync - Remove synchronized addresses from the destination device * @to: destination device * @from: source device @@ -559,7 +655,7 @@ int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr) } } err = __hw_addr_create_ex(&dev->mc, addr, dev->addr_len, - NETDEV_HW_ADDR_T_MULTICAST, true); + NETDEV_HW_ADDR_T_MULTICAST, true, false); if (!err) __dev_set_rx_mode(dev); out: @@ -575,7 +671,7 @@ static int __dev_mc_add(struct net_device *dev, const unsigned char *addr, netif_addr_lock_bh(dev); err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len, - NETDEV_HW_ADDR_T_MULTICAST, global); + NETDEV_HW_ADDR_T_MULTICAST, global, false); if (!err) __dev_set_rx_mode(dev); netif_addr_unlock_bh(dev); @@ -615,7 +711,7 @@ static int __dev_mc_del(struct net_device *dev, const unsigned char *addr, netif_addr_lock_bh(dev); err = __hw_addr_del_ex(&dev->mc, addr, dev->addr_len, - NETDEV_HW_ADDR_T_MULTICAST, global); + NETDEV_HW_ADDR_T_MULTICAST, global, false); if (!err) __dev_set_rx_mode(dev); netif_addr_unlock_bh(dev); @@ -679,6 +775,36 @@ int dev_mc_sync(struct net_device *to, struct net_device *from) EXPORT_SYMBOL(dev_mc_sync); /** + * dev_mc_sync_multiple - Synchronize device's unicast list to another + * device, but allow for multiple calls to sync to multiple devices. + * @to: destination device + * @from: source device + * + * Add newly added addresses to the destination device and release + * addresses that have no users left. The source device must be + * locked by netif_addr_lock_bh. + * + * This function is intended to be called from the ndo_set_rx_mode + * function of layered software devices. It allows for a single + * source device to be synced to multiple destination devices. + */ +int dev_mc_sync_multiple(struct net_device *to, struct net_device *from) +{ + int err = 0; + + if (to->addr_len != from->addr_len) + return -EINVAL; + + netif_addr_lock_nested(to); + err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len); + if (!err) + __dev_set_rx_mode(to); + netif_addr_unlock(to); + return err; +} +EXPORT_SYMBOL(dev_mc_sync_multiple); + +/** * dev_mc_unsync - Remove synchronized addresses from the destination device * @to: destination device * @from: source device diff --git a/net/core/dst.c b/net/core/dst.c index 35fd12f1a69c..df9cc810ec8e 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -320,27 +320,28 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old) EXPORT_SYMBOL(__dst_destroy_metrics_generic); /** - * skb_dst_set_noref - sets skb dst, without a reference + * __skb_dst_set_noref - sets skb dst, without a reference * @skb: buffer * @dst: dst entry + * @force: if force is set, use noref version even for DST_NOCACHE entries * * Sets skb dst, assuming a reference was not taken on dst * skb_dst_drop() should not dst_release() this dst */ -void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) +void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst, bool force) { WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); /* If dst not in cache, we must take a reference, because * dst_release() will destroy dst as soon as its refcount becomes zero */ - if (unlikely(dst->flags & DST_NOCACHE)) { + if (unlikely((dst->flags & DST_NOCACHE) && !force)) { dst_hold(dst); skb_dst_set(skb, dst); } else { skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF; } } -EXPORT_SYMBOL(skb_dst_set_noref); +EXPORT_SYMBOL(__skb_dst_set_noref); /* Dirty hack. We did it in 2.2 (in __dst_free), * we have _very_ good reasons not to repeat diff --git a/net/core/flow.c b/net/core/flow.c index 707fb7b952e8..7102f166482d 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -346,7 +346,7 @@ static void flow_cache_flush_per_cpu(void *data) struct flow_flush_info *info = data; struct tasklet_struct *tasklet; - tasklet = this_cpu_ptr(&info->cache->percpu->flush_tasklet); + tasklet = &this_cpu_ptr(info->cache->percpu)->flush_tasklet; tasklet->data = (unsigned long)info; tasklet_schedule(tasklet); } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index c72a646d9f44..89a3a07d85fb 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -39,21 +39,13 @@ #include <linux/string.h> #include <linux/log2.h> +#define DEBUG #define NEIGH_DEBUG 1 - -#define NEIGH_PRINTK(x...) printk(x) -#define NEIGH_NOPRINTK(x...) do { ; } while(0) -#define NEIGH_PRINTK1 NEIGH_NOPRINTK -#define NEIGH_PRINTK2 NEIGH_NOPRINTK - -#if NEIGH_DEBUG >= 1 -#undef NEIGH_PRINTK1 -#define NEIGH_PRINTK1 NEIGH_PRINTK -#endif -#if NEIGH_DEBUG >= 2 -#undef NEIGH_PRINTK2 -#define NEIGH_PRINTK2 NEIGH_PRINTK -#endif +#define neigh_dbg(level, fmt, ...) \ +do { \ + if (level <= NEIGH_DEBUG) \ + pr_debug(fmt, ##__VA_ARGS__); \ +} while (0) #define PNEIGH_HASHMASK 0xF @@ -246,7 +238,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev) n->nud_state = NUD_NOARP; else n->nud_state = NUD_NONE; - NEIGH_PRINTK2("neigh %p is stray.\n", n); + neigh_dbg(2, "neigh %p is stray\n", n); } write_unlock(&n->lock); neigh_cleanup_and_release(n); @@ -542,7 +534,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, lockdep_is_held(&tbl->lock))); rcu_assign_pointer(nht->hash_buckets[hash_val], n); write_unlock_bh(&tbl->lock); - NEIGH_PRINTK2("neigh %p is created.\n", n); + neigh_dbg(2, "neigh %p is created\n", n); rc = n; out: return rc; @@ -725,7 +717,7 @@ void neigh_destroy(struct neighbour *neigh) dev_put(dev); neigh_parms_put(neigh->parms); - NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh); + neigh_dbg(2, "neigh %p is destroyed\n", neigh); atomic_dec(&neigh->tbl->entries); kfree_rcu(neigh, rcu); @@ -739,7 +731,7 @@ EXPORT_SYMBOL(neigh_destroy); */ static void neigh_suspect(struct neighbour *neigh) { - NEIGH_PRINTK2("neigh %p is suspected.\n", neigh); + neigh_dbg(2, "neigh %p is suspected\n", neigh); neigh->output = neigh->ops->output; } @@ -751,7 +743,7 @@ static void neigh_suspect(struct neighbour *neigh) */ static void neigh_connect(struct neighbour *neigh) { - NEIGH_PRINTK2("neigh %p is connected.\n", neigh); + neigh_dbg(2, "neigh %p is connected\n", neigh); neigh->output = neigh->ops->connected_output; } @@ -852,7 +844,7 @@ static void neigh_invalidate(struct neighbour *neigh) struct sk_buff *skb; NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed); - NEIGH_PRINTK2("neigh %p is failed.\n", neigh); + neigh_dbg(2, "neigh %p is failed\n", neigh); neigh->updated = jiffies; /* It is very thin place. report_unreachable is very complicated @@ -904,17 +896,17 @@ static void neigh_timer_handler(unsigned long arg) if (state & NUD_REACHABLE) { if (time_before_eq(now, neigh->confirmed + neigh->parms->reachable_time)) { - NEIGH_PRINTK2("neigh %p is still alive.\n", neigh); + neigh_dbg(2, "neigh %p is still alive\n", neigh); next = neigh->confirmed + neigh->parms->reachable_time; } else if (time_before_eq(now, neigh->used + neigh->parms->delay_probe_time)) { - NEIGH_PRINTK2("neigh %p is delayed.\n", neigh); + neigh_dbg(2, "neigh %p is delayed\n", neigh); neigh->nud_state = NUD_DELAY; neigh->updated = jiffies; neigh_suspect(neigh); next = now + neigh->parms->delay_probe_time; } else { - NEIGH_PRINTK2("neigh %p is suspected.\n", neigh); + neigh_dbg(2, "neigh %p is suspected\n", neigh); neigh->nud_state = NUD_STALE; neigh->updated = jiffies; neigh_suspect(neigh); @@ -923,14 +915,14 @@ static void neigh_timer_handler(unsigned long arg) } else if (state & NUD_DELAY) { if (time_before_eq(now, neigh->confirmed + neigh->parms->delay_probe_time)) { - NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh); + neigh_dbg(2, "neigh %p is now reachable\n", neigh); neigh->nud_state = NUD_REACHABLE; neigh->updated = jiffies; neigh_connect(neigh); notify = 1; next = neigh->confirmed + neigh->parms->reachable_time; } else { - NEIGH_PRINTK2("neigh %p is probed.\n", neigh); + neigh_dbg(2, "neigh %p is probed\n", neigh); neigh->nud_state = NUD_PROBE; neigh->updated = jiffies; atomic_set(&neigh->probes, 0); @@ -997,7 +989,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) return 1; } } else if (neigh->nud_state & NUD_STALE) { - NEIGH_PRINTK2("neigh %p is delayed.\n", neigh); + neigh_dbg(2, "neigh %p is delayed\n", neigh); neigh->nud_state = NUD_DELAY; neigh->updated = jiffies; neigh_add_timer(neigh, @@ -1320,8 +1312,7 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb) out: return rc; discard: - NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n", - dst, neigh); + neigh_dbg(1, "%s: dst=%p neigh=%p\n", __func__, dst, neigh); out_kfree_skb: rc = -EINVAL; kfree_skb(skb); @@ -1498,7 +1489,7 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) } } write_unlock_bh(&tbl->lock); - NEIGH_PRINTK1("neigh_parms_release: not found\n"); + neigh_dbg(1, "%s: not found\n", __func__); } EXPORT_SYMBOL(neigh_parms_release); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d2322d7f0f7b..589d0abb34a0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -496,8 +496,10 @@ static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev) } if (ops->fill_info) { data = nla_nest_start(skb, IFLA_INFO_DATA); - if (data == NULL) + if (data == NULL) { + err = -EMSGSIZE; goto err_cancel_link; + } err = ops->fill_info(skb, dev); if (err < 0) goto err_cancel_data; diff --git a/net/core/scm.c b/net/core/scm.c index 905dcc6ad1e3..03795d0147f2 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -24,6 +24,7 @@ #include <linux/interrupt.h> #include <linux/netdevice.h> #include <linux/security.h> +#include <linux/pid_namespace.h> #include <linux/pid.h> #include <linux/nsproxy.h> #include <linux/slab.h> @@ -52,7 +53,8 @@ static __inline__ int scm_check_creds(struct ucred *creds) if (!uid_valid(uid) || !gid_valid(gid)) return -EINVAL; - if ((creds->pid == task_tgid_vnr(current) || nsown_capable(CAP_SYS_ADMIN)) && + if ((creds->pid == task_tgid_vnr(current) || + ns_capable(current->nsproxy->pid_ns->user_ns, CAP_SYS_ADMIN)) && ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) || uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) && ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) || @@ -185,22 +187,6 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) p->creds.uid = uid; p->creds.gid = gid; - - if (!p->cred || - !uid_eq(p->cred->euid, uid) || - !gid_eq(p->cred->egid, gid)) { - struct cred *cred; - err = -ENOMEM; - cred = prepare_creds(); - if (!cred) - goto error; - - cred->uid = cred->euid = uid; - cred->gid = cred->egid = gid; - if (p->cred) - put_cred(p->cred); - p->cred = cred; - } break; } default: @@ -304,8 +290,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) /* Bump the usage count and install the file. */ sock = sock_from_file(fp[i], &err); if (sock) { - sock_update_netprioidx(sock->sk, current); - sock_update_classid(sock->sk, current); + sock_update_netprioidx(sock->sk); + sock_update_classid(sock->sk); } fd_install(new_fd, get_file(fp[i])); } diff --git a/net/core/sock.c b/net/core/sock.c index a19e728d5fb6..d4f4cea726e7 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -907,6 +907,10 @@ set_rcvbuf: sock_valbool_flag(sk, SOCK_NOFCS, valbool); break; + case SO_SELECT_ERR_QUEUE: + sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool); + break; + default: ret = -ENOPROTOOPT; break; @@ -1160,6 +1164,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sock_flag(sk, SOCK_FILTER_LOCKED); break; + case SO_SELECT_ERR_QUEUE: + v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE); + break; + default: return -ENOPROTOOPT; } @@ -1299,11 +1307,11 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) } #if IS_ENABLED(CONFIG_NET_CLS_CGROUP) -void sock_update_classid(struct sock *sk, struct task_struct *task) +void sock_update_classid(struct sock *sk) { u32 classid; - classid = task_cls_classid(task); + classid = task_cls_classid(current); if (classid != sk->sk_classid) sk->sk_classid = classid; } @@ -1311,12 +1319,12 @@ EXPORT_SYMBOL(sock_update_classid); #endif #if IS_ENABLED(CONFIG_NETPRIO_CGROUP) -void sock_update_netprioidx(struct sock *sk, struct task_struct *task) +void sock_update_netprioidx(struct sock *sk) { if (in_interrupt()) return; - sk->sk_cgrp_prioidx = task_netprioidx(task); + sk->sk_cgrp_prioidx = task_netprioidx(current); } EXPORT_SYMBOL_GPL(sock_update_netprioidx); #endif @@ -1345,8 +1353,8 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, sock_net_set(sk, get_net(net)); atomic_set(&sk->sk_wmem_alloc, 1); - sock_update_classid(sk, current); - sock_update_netprioidx(sk, current); + sock_update_classid(sk); + sock_update_netprioidx(sk); } return sk; diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index b15c1d1720fb..86e3807052e9 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -19,7 +19,6 @@ #include <linux/sockios.h> #include <linux/init.h> #include <linux/skbuff.h> -#include <net/netlink.h> #include <linux/rtnetlink.h> #include <linux/proc_fs.h> #include <linux/netdevice.h> diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index e1b4580f78dd..55e1fd5b3e56 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -1139,10 +1139,10 @@ static netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev) error: dev_kfree_skb(skb); out: - if (err < 0) + if (err) pr_debug("ERROR: xmit failed\n"); - return (err < 0 ? NETDEV_TX_BUSY : NETDEV_TX_OK); + return (err < 0) ? NET_XMIT_DROP : err; } static struct wpan_phy *lowpan_get_phy(const struct net_device *dev) diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index 96bb08abece2..b0bdd8c51e9c 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -315,7 +315,7 @@ static int ieee802154_associate_req(struct sk_buff *skb, struct net_device *dev; struct ieee802154_addr addr; u8 page; - int ret = -EINVAL; + int ret = -EOPNOTSUPP; if (!info->attrs[IEEE802154_ATTR_CHANNEL] || !info->attrs[IEEE802154_ATTR_COORD_PAN_ID] || @@ -327,6 +327,8 @@ static int ieee802154_associate_req(struct sk_buff *skb, dev = ieee802154_nl_get_dev(info); if (!dev) return -ENODEV; + if (!ieee802154_mlme_ops(dev)->assoc_req) + goto out; if (info->attrs[IEEE802154_ATTR_COORD_HW_ADDR]) { addr.addr_type = IEEE802154_ADDR_LONG; @@ -350,6 +352,7 @@ static int ieee802154_associate_req(struct sk_buff *skb, page, nla_get_u8(info->attrs[IEEE802154_ATTR_CAPABILITY])); +out: dev_put(dev); return ret; } @@ -359,7 +362,7 @@ static int ieee802154_associate_resp(struct sk_buff *skb, { struct net_device *dev; struct ieee802154_addr addr; - int ret = -EINVAL; + int ret = -EOPNOTSUPP; if (!info->attrs[IEEE802154_ATTR_STATUS] || !info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] || @@ -369,6 +372,8 @@ static int ieee802154_associate_resp(struct sk_buff *skb, dev = ieee802154_nl_get_dev(info); if (!dev) return -ENODEV; + if (!ieee802154_mlme_ops(dev)->assoc_resp) + goto out; addr.addr_type = IEEE802154_ADDR_LONG; nla_memcpy(addr.hwaddr, info->attrs[IEEE802154_ATTR_DEST_HW_ADDR], @@ -380,6 +385,7 @@ static int ieee802154_associate_resp(struct sk_buff *skb, nla_get_u16(info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]), nla_get_u8(info->attrs[IEEE802154_ATTR_STATUS])); +out: dev_put(dev); return ret; } @@ -389,7 +395,7 @@ static int ieee802154_disassociate_req(struct sk_buff *skb, { struct net_device *dev; struct ieee802154_addr addr; - int ret = -EINVAL; + int ret = -EOPNOTSUPP; if ((!info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] && !info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]) || @@ -399,6 +405,8 @@ static int ieee802154_disassociate_req(struct sk_buff *skb, dev = ieee802154_nl_get_dev(info); if (!dev) return -ENODEV; + if (!ieee802154_mlme_ops(dev)->disassoc_req) + goto out; if (info->attrs[IEEE802154_ATTR_DEST_HW_ADDR]) { addr.addr_type = IEEE802154_ADDR_LONG; @@ -415,6 +423,7 @@ static int ieee802154_disassociate_req(struct sk_buff *skb, ret = ieee802154_mlme_ops(dev)->disassoc_req(dev, &addr, nla_get_u8(info->attrs[IEEE802154_ATTR_REASON])); +out: dev_put(dev); return ret; } @@ -432,7 +441,7 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info) u8 channel, bcn_ord, sf_ord; u8 page; int pan_coord, blx, coord_realign; - int ret; + int ret = -EOPNOTSUPP; if (!info->attrs[IEEE802154_ATTR_COORD_PAN_ID] || !info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR] || @@ -448,6 +457,8 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info) dev = ieee802154_nl_get_dev(info); if (!dev) return -ENODEV; + if (!ieee802154_mlme_ops(dev)->start_req) + goto out; addr.addr_type = IEEE802154_ADDR_SHORT; addr.short_addr = nla_get_u16( @@ -476,6 +487,7 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info) ret = ieee802154_mlme_ops(dev)->start_req(dev, &addr, channel, page, bcn_ord, sf_ord, pan_coord, blx, coord_realign); +out: dev_put(dev); return ret; } @@ -483,7 +495,7 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info) static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info) { struct net_device *dev; - int ret; + int ret = -EOPNOTSUPP; u8 type; u32 channels; u8 duration; @@ -497,6 +509,8 @@ static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info) dev = ieee802154_nl_get_dev(info); if (!dev) return -ENODEV; + if (!ieee802154_mlme_ops(dev)->scan_req) + goto out; type = nla_get_u8(info->attrs[IEEE802154_ATTR_SCAN_TYPE]); channels = nla_get_u32(info->attrs[IEEE802154_ATTR_CHANNELS]); @@ -511,6 +525,7 @@ static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info) ret = ieee802154_mlme_ops(dev)->scan_req(dev, type, channels, page, duration); +out: dev_put(dev); return ret; } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 5d985e367535..2759dfd576ae 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -802,8 +802,10 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) if (nlh->nlmsg_flags & NLM_F_EXCL || !(nlh->nlmsg_flags & NLM_F_REPLACE)) return -EEXIST; - - set_ifa_lifetime(ifa_existing, valid_lft, prefered_lft); + ifa = ifa_existing; + set_ifa_lifetime(ifa, valid_lft, prefered_lft); + rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid); + blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); } return 0; } diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 1206ca64b0ea..e97d66a1fdde 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -52,20 +52,27 @@ static void inet_frag_secret_rebuild(unsigned long dummy) unsigned long now = jiffies; int i; + /* Per bucket lock NOT needed here, due to write lock protection */ write_lock(&f->lock); + get_random_bytes(&f->rnd, sizeof(u32)); for (i = 0; i < INETFRAGS_HASHSZ; i++) { + struct inet_frag_bucket *hb; struct inet_frag_queue *q; struct hlist_node *n; - hlist_for_each_entry_safe(q, n, &f->hash[i], list) { + hb = &f->hash[i]; + hlist_for_each_entry_safe(q, n, &hb->chain, list) { unsigned int hval = f->hashfn(q); if (hval != i) { + struct inet_frag_bucket *hb_dest; + hlist_del(&q->list); /* Relink to new hash chain. */ - hlist_add_head(&q->list, &f->hash[hval]); + hb_dest = &f->hash[hval]; + hlist_add_head(&q->list, &hb_dest->chain); } } } @@ -78,9 +85,12 @@ void inet_frags_init(struct inet_frags *f) { int i; - for (i = 0; i < INETFRAGS_HASHSZ; i++) - INIT_HLIST_HEAD(&f->hash[i]); + for (i = 0; i < INETFRAGS_HASHSZ; i++) { + struct inet_frag_bucket *hb = &f->hash[i]; + spin_lock_init(&hb->chain_lock); + INIT_HLIST_HEAD(&hb->chain); + } rwlock_init(&f->lock); f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ @@ -122,9 +132,18 @@ EXPORT_SYMBOL(inet_frags_exit_net); static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) { - write_lock(&f->lock); + struct inet_frag_bucket *hb; + unsigned int hash; + + read_lock(&f->lock); + hash = f->hashfn(fq); + hb = &f->hash[hash]; + + spin_lock(&hb->chain_lock); hlist_del(&fq->list); - write_unlock(&f->lock); + spin_unlock(&hb->chain_lock); + + read_unlock(&f->lock); inet_frag_lru_del(fq); } @@ -226,27 +245,32 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, struct inet_frag_queue *qp_in, struct inet_frags *f, void *arg) { + struct inet_frag_bucket *hb; struct inet_frag_queue *qp; #ifdef CONFIG_SMP #endif unsigned int hash; - write_lock(&f->lock); + read_lock(&f->lock); /* Protects against hash rebuild */ /* * While we stayed w/o the lock other CPU could update * the rnd seed, so we need to re-calculate the hash * chain. Fortunatelly the qp_in can be used to get one. */ hash = f->hashfn(qp_in); + hb = &f->hash[hash]; + spin_lock(&hb->chain_lock); + #ifdef CONFIG_SMP /* With SMP race we have to recheck hash table, because * such entry could be created on other cpu, while we - * promoted read lock to write lock. + * released the hash bucket lock. */ - hlist_for_each_entry(qp, &f->hash[hash], list) { + hlist_for_each_entry(qp, &hb->chain, list) { if (qp->net == nf && f->match(qp, arg)) { atomic_inc(&qp->refcnt); - write_unlock(&f->lock); + spin_unlock(&hb->chain_lock); + read_unlock(&f->lock); qp_in->last_in |= INET_FRAG_COMPLETE; inet_frag_put(qp_in, f); return qp; @@ -258,8 +282,9 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, atomic_inc(&qp->refcnt); atomic_inc(&qp->refcnt); - hlist_add_head(&qp->list, &f->hash[hash]); - write_unlock(&f->lock); + hlist_add_head(&qp->list, &hb->chain); + spin_unlock(&hb->chain_lock); + read_unlock(&f->lock); inet_frag_lru_add(nf, qp); return qp; } @@ -300,17 +325,23 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, struct inet_frags *f, void *key, unsigned int hash) __releases(&f->lock) { + struct inet_frag_bucket *hb; struct inet_frag_queue *q; int depth = 0; - hlist_for_each_entry(q, &f->hash[hash], list) { + hb = &f->hash[hash]; + + spin_lock(&hb->chain_lock); + hlist_for_each_entry(q, &hb->chain, list) { if (q->net == nf && f->match(q, key)) { atomic_inc(&q->refcnt); + spin_unlock(&hb->chain_lock); read_unlock(&f->lock); return q; } depth++; } + spin_unlock(&hb->chain_lock); read_unlock(&f->lock); if (depth <= INETFRAGS_MAXDEPTH) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index ad662e906f7e..987a4e5e07e2 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -159,14 +159,14 @@ static int ip_gre_calc_hlen(__be16 o_flags) static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, bool *csum_err, int *hdr_len) { - struct iphdr *iph = ip_hdr(skb); - struct gre_base_hdr *greh; + unsigned int ip_hlen = ip_hdrlen(skb); + const struct gre_base_hdr *greh; __be32 *options; if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr)))) return -EINVAL; - greh = (struct gre_base_hdr *)((u8 *)iph + (iph->ihl << 2)); + greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) return -EINVAL; @@ -176,6 +176,8 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, if (!pskb_may_pull(skb, *hdr_len)) return -EINVAL; + greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); + tpi->proto = greh->protocol; options = (__be32 *)(greh + 1); @@ -660,7 +662,6 @@ static void __gre_tunnel_init(struct net_device *dev) dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4; - dev->iflink = 0; dev->features |= NETIF_F_NETNS_LOCAL | GRE_FEATURES; dev->hw_features |= GRE_FEATURES; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 5e12dca7b3dd..147abf5275aa 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -430,8 +430,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->tc_index = from->tc_index; #endif nf_copy(to, from); -#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ - defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) to->nf_trace = from->nf_trace; #endif #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index f01d1b1aff7f..59cb8c769056 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -75,6 +75,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) t->props.mode = x->props.mode; t->props.saddr.a4 = x->props.saddr.a4; t->props.flags = x->props.flags; + t->props.extra_flags = x->props.extra_flags; memcpy(&t->mark, &x->mark, sizeof(t->mark)); if (xfrm_init_state(t)) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index bf6c5cf31aed..efa1138fa523 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -206,7 +206,7 @@ static int __init ic_open_devs(void) struct ic_device *d, **last; struct net_device *dev; unsigned short oflags; - unsigned long start; + unsigned long start, next_msg; last = &ic_first_dev; rtnl_lock(); @@ -263,12 +263,23 @@ static int __init ic_open_devs(void) /* wait for a carrier on at least one device */ start = jiffies; + next_msg = start + msecs_to_jiffies(CONF_CARRIER_TIMEOUT/12); while (jiffies - start < msecs_to_jiffies(CONF_CARRIER_TIMEOUT)) { + int wait, elapsed; + for_each_netdev(&init_net, dev) if (ic_is_init_dev(dev) && netif_carrier_ok(dev)) goto have_carrier; msleep(1); + + if time_before(jiffies, next_msg) + continue; + + elapsed = jiffies_to_msecs(jiffies - start); + wait = (CONF_CARRIER_TIMEOUT - elapsed + 500)/1000; + pr_info("Waiting up to %d more seconds for network.\n", wait); + next_msg = jiffies + msecs_to_jiffies(CONF_CARRIER_TIMEOUT/12); } have_carrier: rtnl_unlock(); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 3efcf87400c3..e391db1f056d 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -182,8 +182,7 @@ ipt_get_target_c(const struct ipt_entry *e) return ipt_get_target((struct ipt_entry *)e); } -#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ - defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) static const char *const hooknames[] = { [NF_INET_PRE_ROUTING] = "PREROUTING", [NF_INET_LOCAL_IN] = "INPUT", @@ -259,6 +258,7 @@ static void trace_packet(const struct sk_buff *skb, const char *hookname, *chainname, *comment; const struct ipt_entry *iter; unsigned int rulenum = 0; + struct net *net = dev_net(in ? in : out); table_base = private->entries[smp_processor_id()]; root = get_entry(table_base, private->hook_entry[hook]); @@ -271,7 +271,7 @@ static void trace_packet(const struct sk_buff *skb, &chainname, &comment, &rulenum) != 0) break; - nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo, + nf_log_packet(net, AF_INET, hook, skb, in, out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", tablename, chainname, comment, rulenum); } @@ -361,8 +361,7 @@ ipt_do_table(struct sk_buff *skb, t = ipt_get_target(e); IP_NF_ASSERT(t->u.kernel.target); -#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ - defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) /* The packet is traced: log it */ if (unlikely(skb->nf_trace)) trace_packet(skb, hook, in, out, diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index e7f8cad11393..8799c836ccaa 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -45,6 +45,7 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ipt_ULOG.h> #include <net/netfilter/nf_log.h> +#include <net/netns/generic.h> #include <net/sock.h> #include <linux/bitops.h> #include <asm/unaligned.h> @@ -78,15 +79,23 @@ typedef struct { struct timer_list timer; /* the timer function */ } ulog_buff_t; -static ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS]; /* array of buffers */ +static int ulog_net_id __read_mostly; +struct ulog_net { + unsigned int nlgroup[ULOG_MAXNLGROUPS]; + ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS]; + struct sock *nflognl; + spinlock_t lock; +}; -static struct sock *nflognl; /* our socket */ -static DEFINE_SPINLOCK(ulog_lock); /* spinlock */ +static struct ulog_net *ulog_pernet(struct net *net) +{ + return net_generic(net, ulog_net_id); +} /* send one ulog_buff_t to userspace */ -static void ulog_send(unsigned int nlgroupnum) +static void ulog_send(struct ulog_net *ulog, unsigned int nlgroupnum) { - ulog_buff_t *ub = &ulog_buffers[nlgroupnum]; + ulog_buff_t *ub = &ulog->ulog_buffers[nlgroupnum]; pr_debug("ulog_send: timer is deleting\n"); del_timer(&ub->timer); @@ -103,7 +112,8 @@ static void ulog_send(unsigned int nlgroupnum) NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1; pr_debug("throwing %d packets to netlink group %u\n", ub->qlen, nlgroupnum + 1); - netlink_broadcast(nflognl, ub->skb, 0, nlgroupnum + 1, GFP_ATOMIC); + netlink_broadcast(ulog->nflognl, ub->skb, 0, nlgroupnum + 1, + GFP_ATOMIC); ub->qlen = 0; ub->skb = NULL; @@ -114,13 +124,16 @@ static void ulog_send(unsigned int nlgroupnum) /* timer function to flush queue in flushtimeout time */ static void ulog_timer(unsigned long data) { + struct ulog_net *ulog = container_of((void *)data, + struct ulog_net, + nlgroup[*(unsigned int *)data]); pr_debug("timer function called, calling ulog_send\n"); /* lock to protect against somebody modifying our structure * from ipt_ulog_target at the same time */ - spin_lock_bh(&ulog_lock); - ulog_send(data); - spin_unlock_bh(&ulog_lock); + spin_lock_bh(&ulog->lock); + ulog_send(ulog, data); + spin_unlock_bh(&ulog->lock); } static struct sk_buff *ulog_alloc_skb(unsigned int size) @@ -160,6 +173,8 @@ static void ipt_ulog_packet(unsigned int hooknum, size_t size, copy_len; struct nlmsghdr *nlh; struct timeval tv; + struct net *net = dev_net(in ? in : out); + struct ulog_net *ulog = ulog_pernet(net); /* ffs == find first bit set, necessary because userspace * is already shifting groupnumber, but we need unshifted. @@ -174,9 +189,9 @@ static void ipt_ulog_packet(unsigned int hooknum, size = nlmsg_total_size(sizeof(*pm) + copy_len); - ub = &ulog_buffers[groupnum]; + ub = &ulog->ulog_buffers[groupnum]; - spin_lock_bh(&ulog_lock); + spin_lock_bh(&ulog->lock); if (!ub->skb) { if (!(ub->skb = ulog_alloc_skb(size))) @@ -186,7 +201,7 @@ static void ipt_ulog_packet(unsigned int hooknum, /* either the queue len is too high or we don't have * enough room in nlskb left. send it to userspace. */ - ulog_send(groupnum); + ulog_send(ulog, groupnum); if (!(ub->skb = ulog_alloc_skb(size))) goto alloc_failure; @@ -260,16 +275,16 @@ static void ipt_ulog_packet(unsigned int hooknum, if (ub->qlen >= loginfo->qthreshold) { if (loginfo->qthreshold > 1) nlh->nlmsg_type = NLMSG_DONE; - ulog_send(groupnum); + ulog_send(ulog, groupnum); } out_unlock: - spin_unlock_bh(&ulog_lock); + spin_unlock_bh(&ulog->lock); return; alloc_failure: pr_debug("Error building netlink message\n"); - spin_unlock_bh(&ulog_lock); + spin_unlock_bh(&ulog->lock); } static unsigned int @@ -376,54 +391,43 @@ static struct nf_logger ipt_ulog_logger __read_mostly = { .me = THIS_MODULE, }; -static int __init ulog_tg_init(void) +static int __net_init ulog_tg_net_init(struct net *net) { - int ret, i; + int i; + struct ulog_net *ulog = ulog_pernet(net); struct netlink_kernel_cfg cfg = { .groups = ULOG_MAXNLGROUPS, }; - pr_debug("init module\n"); - - if (nlbufsiz > 128*1024) { - pr_warning("Netlink buffer has to be <= 128kB\n"); - return -EINVAL; - } - + spin_lock_init(&ulog->lock); /* initialize ulog_buffers */ for (i = 0; i < ULOG_MAXNLGROUPS; i++) - setup_timer(&ulog_buffers[i].timer, ulog_timer, i); + setup_timer(&ulog->ulog_buffers[i].timer, ulog_timer, i); - nflognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, &cfg); - if (!nflognl) + ulog->nflognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg); + if (!ulog->nflognl) return -ENOMEM; - ret = xt_register_target(&ulog_tg_reg); - if (ret < 0) { - netlink_kernel_release(nflognl); - return ret; - } if (nflog) - nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger); + nf_log_set(net, NFPROTO_IPV4, &ipt_ulog_logger); return 0; } -static void __exit ulog_tg_exit(void) +static void __net_exit ulog_tg_net_exit(struct net *net) { ulog_buff_t *ub; int i; - - pr_debug("cleanup_module\n"); + struct ulog_net *ulog = ulog_pernet(net); if (nflog) - nf_log_unregister(&ipt_ulog_logger); - xt_unregister_target(&ulog_tg_reg); - netlink_kernel_release(nflognl); + nf_log_unset(net, &ipt_ulog_logger); + + netlink_kernel_release(ulog->nflognl); /* remove pending timers and free allocated skb's */ for (i = 0; i < ULOG_MAXNLGROUPS; i++) { - ub = &ulog_buffers[i]; + ub = &ulog->ulog_buffers[i]; pr_debug("timer is deleting\n"); del_timer(&ub->timer); @@ -434,5 +438,50 @@ static void __exit ulog_tg_exit(void) } } +static struct pernet_operations ulog_tg_net_ops = { + .init = ulog_tg_net_init, + .exit = ulog_tg_net_exit, + .id = &ulog_net_id, + .size = sizeof(struct ulog_net), +}; + +static int __init ulog_tg_init(void) +{ + int ret; + pr_debug("init module\n"); + + if (nlbufsiz > 128*1024) { + pr_warn("Netlink buffer has to be <= 128kB\n"); + return -EINVAL; + } + + ret = register_pernet_subsys(&ulog_tg_net_ops); + if (ret) + goto out_pernet; + + ret = xt_register_target(&ulog_tg_reg); + if (ret < 0) + goto out_target; + + if (nflog) + nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger); + + return 0; + +out_target: + unregister_pernet_subsys(&ulog_tg_net_ops); +out_pernet: + return ret; +} + +static void __exit ulog_tg_exit(void) +{ + pr_debug("cleanup_module\n"); + if (nflog) + nf_log_unregister(&ipt_ulog_logger); + xt_unregister_target(&ulog_tg_reg); + unregister_pernet_subsys(&ulog_tg_net_ops); +} + module_init(ulog_tg_init); module_exit(ulog_tg_exit); diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 5241d997ab75..c2cd63d2d892 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -187,8 +187,8 @@ icmp_error(struct net *net, struct nf_conn *tmpl, icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih); if (icmph == NULL) { if (LOG_INVALID(net, IPPROTO_ICMP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, - "nf_ct_icmp: short packet "); + nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, + NULL, "nf_ct_icmp: short packet "); return -NF_ACCEPT; } @@ -196,7 +196,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl, if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_ip_checksum(skb, hooknum, dataoff, 0)) { if (LOG_INVALID(net, IPPROTO_ICMP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL, "nf_ct_icmp: bad HW ICMP checksum "); return -NF_ACCEPT; } @@ -209,7 +209,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl, */ if (icmph->type > NR_ICMP_TYPES) { if (LOG_INVALID(net, IPPROTO_ICMP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL, "nf_ct_icmp: invalid ICMP type "); return -NF_ACCEPT; } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 2e91006d6076..7d93d62cd5fd 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -514,9 +514,8 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.opt = NULL; ipc.oif = sk->sk_bound_dev_if; ipc.tx_flags = 0; - err = sock_tx_timestamp(sk, &ipc.tx_flags); - if (err) - return err; + + sock_tx_timestamp(sk, &ipc.tx_flags); if (msg->msg_controllen) { err = ip_cmsg_send(sock_net(sk), msg, &ipc); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a96f7b586277..dcb116dde216 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2885,6 +2885,8 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, __be32 delta; unsigned int oldlen; unsigned int mss; + struct sk_buff *gso_skb = skb; + __sum16 newcheck; if (!pskb_may_pull(skb, sizeof(*th))) goto out; @@ -2935,11 +2937,13 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, th = tcp_hdr(skb); seq = ntohl(th->seq); + newcheck = ~csum_fold((__force __wsum)((__force u32)th->check + + (__force u32)delta)); + do { th->fin = th->psh = 0; + th->check = newcheck; - th->check = ~csum_fold((__force __wsum)((__force u32)th->check + - (__force u32)delta)); if (skb->ip_summed != CHECKSUM_PARTIAL) th->check = csum_fold(csum_partial(skb_transport_header(skb), @@ -2953,6 +2957,17 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, th->cwr = 0; } while (skb->next); + /* Following permits TCP Small Queues to work well with GSO : + * The callback to TCP stack will be called at the time last frag + * is freed at TX completion, and not right now when gso_skb + * is freed by GSO engine + */ + if (gso_skb->destructor == tcp_wfree) { + swap(gso_skb->sk, skb->sk); + swap(gso_skb->destructor, skb->destructor); + swap(gso_skb->truesize, skb->truesize); + } + delta = htonl(oldlen + (skb->tail - skb->transport_header) + skb->data_len); th->check = ~csum_fold((__force __wsum)((__force u32)th->check + diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index b6f3583ddfe8..da14436c1735 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -64,7 +64,6 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg) { struct cg_proto *cg_proto; struct tcp_memcontrol *tcp; - u64 val; cg_proto = tcp_prot.proto_cgroup(memcg); if (!cg_proto) @@ -72,8 +71,6 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg) tcp = tcp_from_cgproto(cg_proto); percpu_counter_destroy(&tcp->tcp_sockets_allocated); - - val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); } EXPORT_SYMBOL(tcp_destroy_cgroup); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index af354c98fdb5..d12694353540 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -787,7 +787,7 @@ void __init tcp_tasklet_init(void) * We cant xmit new skbs from this context, as we might already * hold qdisc lock. */ -static void tcp_wfree(struct sk_buff *skb) +void tcp_wfree(struct sk_buff *skb) { struct sock *sk = skb->sk; struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7117d1467b02..2722db024a0b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -902,9 +902,9 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.addr = inet->inet_saddr; ipc.oif = sk->sk_bound_dev_if; - err = sock_tx_timestamp(sk, &ipc.tx_flags); - if (err) - return err; + + sock_tx_timestamp(sk, &ipc.tx_flags); + if (msg->msg_controllen) { err = ip_cmsg_send(sock_net(sk), msg, &ipc); if (err) diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index fe5189e2e114..eb1dd4d643f2 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -103,8 +103,12 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family); - /* DS disclosed */ - top_iph->tos = INET_ECN_encapsulate(XFRM_MODE_SKB_CB(skb)->tos, + /* DS disclosing depends on XFRM_SA_XFLAG_DONT_ENCAP_DSCP */ + if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) + top_iph->tos = 0; + else + top_iph->tos = XFRM_MODE_SKB_CB(skb)->tos; + top_iph->tos = INET_ECN_encapsulate(top_iph->tos, XFRM_MODE_SKB_CB(skb)->tos); flags = x->props.flags; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d6279cb3d547..28b61e89bbb8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -422,6 +422,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) ipv6_regen_rndid((unsigned long) ndev); } #endif + ndev->token = in6addr_any; if (netif_running(dev) && addrconf_qdisc_ok(dev)) ndev->if_flags |= IF_READY; @@ -877,6 +878,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, ifa->prefix_len = pfxlen; ifa->flags = flags | IFA_F_TENTATIVE; ifa->cstamp = ifa->tstamp = jiffies; + ifa->tokenized = false; ifa->rt = rt; @@ -2133,11 +2135,19 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) struct inet6_ifaddr *ifp; struct in6_addr addr; int create = 0, update_lft = 0; + bool tokenized = false; if (pinfo->prefix_len == 64) { memcpy(&addr, &pinfo->prefix, 8); - if (ipv6_generate_eui64(addr.s6_addr + 8, dev) && - ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) { + + if (!ipv6_addr_any(&in6_dev->token)) { + read_lock_bh(&in6_dev->lock); + memcpy(addr.s6_addr + 8, + in6_dev->token.s6_addr + 8, 8); + read_unlock_bh(&in6_dev->lock); + tokenized = true; + } else if (ipv6_generate_eui64(addr.s6_addr + 8, dev) && + ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) { in6_dev_put(in6_dev); return; } @@ -2178,6 +2188,7 @@ ok: update_lft = create = 1; ifp->cstamp = jiffies; + ifp->tokenized = tokenized; addrconf_dad_start(ifp); } @@ -2616,6 +2627,9 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) static void init_loopback(struct net_device *dev) { struct inet6_dev *idev; + struct net_device *sp_dev; + struct inet6_ifaddr *sp_ifa; + struct rt6_info *sp_rt; /* ::1 */ @@ -2627,6 +2641,30 @@ static void init_loopback(struct net_device *dev) } add_addr(idev, &in6addr_loopback, 128, IFA_HOST); + + /* Add routes to other interface's IPv6 addresses */ + for_each_netdev(dev_net(dev), sp_dev) { + if (!strcmp(sp_dev->name, dev->name)) + continue; + + idev = __in6_dev_get(sp_dev); + if (!idev) + continue; + + read_lock_bh(&idev->lock); + list_for_each_entry(sp_ifa, &idev->addr_list, if_list) { + + if (sp_ifa->flags & (IFA_F_DADFAILED | IFA_F_TENTATIVE)) + continue; + + sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, 0); + + /* Failure cases are ignored */ + if (!IS_ERR(sp_rt)) + ip6_ins_rt(sp_rt); + } + read_unlock_bh(&idev->lock); + } } static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr *addr) @@ -4138,7 +4176,8 @@ static inline size_t inet6_ifla6_size(void) + nla_total_size(sizeof(struct ifla_cacheinfo)) + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */ + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */ - + nla_total_size(ICMP6_MIB_MAX * 8); /* IFLA_INET6_ICMP6STATS */ + + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */ + + nla_total_size(sizeof(struct in6_addr)); /* IFLA_INET6_TOKEN */ } static inline size_t inet6_if_nlmsg_size(void) @@ -4225,6 +4264,13 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev) goto nla_put_failure; snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla)); + nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr)); + if (nla == NULL) + goto nla_put_failure; + read_lock_bh(&idev->lock); + memcpy(nla_data(nla), idev->token.s6_addr, nla_len(nla)); + read_unlock_bh(&idev->lock); + return 0; nla_put_failure: @@ -4252,6 +4298,80 @@ static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev) return 0; } +static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) +{ + struct inet6_ifaddr *ifp; + struct net_device *dev = idev->dev; + bool update_rs = false; + + if (token == NULL) + return -EINVAL; + if (ipv6_addr_any(token)) + return -EINVAL; + if (dev->flags & (IFF_LOOPBACK | IFF_NOARP)) + return -EINVAL; + if (!ipv6_accept_ra(idev)) + return -EINVAL; + if (idev->cnf.rtr_solicits <= 0) + return -EINVAL; + + write_lock_bh(&idev->lock); + + BUILD_BUG_ON(sizeof(token->s6_addr) != 16); + memcpy(idev->token.s6_addr + 8, token->s6_addr + 8, 8); + + write_unlock_bh(&idev->lock); + + if (!idev->dead && (idev->if_flags & IF_READY)) { + struct in6_addr ll_addr; + + ipv6_get_lladdr(dev, &ll_addr, IFA_F_TENTATIVE | + IFA_F_OPTIMISTIC); + + /* If we're not ready, then normal ifup will take care + * of this. Otherwise, we need to request our rs here. + */ + ndisc_send_rs(dev, &ll_addr, &in6addr_linklocal_allrouters); + update_rs = true; + } + + write_lock_bh(&idev->lock); + + if (update_rs) + idev->if_flags |= IF_RS_SENT; + + /* Well, that's kinda nasty ... */ + list_for_each_entry(ifp, &idev->addr_list, if_list) { + spin_lock(&ifp->lock); + if (ifp->tokenized) { + ifp->valid_lft = 0; + ifp->prefered_lft = 0; + } + spin_unlock(&ifp->lock); + } + + write_unlock_bh(&idev->lock); + return 0; +} + +static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) +{ + int err = -EINVAL; + struct inet6_dev *idev = __in6_dev_get(dev); + struct nlattr *tb[IFLA_INET6_MAX + 1]; + + if (!idev) + return -EAFNOSUPPORT; + + if (nla_parse_nested(tb, IFLA_INET6_MAX, nla, NULL) < 0) + BUG(); + + if (tb[IFLA_INET6_TOKEN]) + err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN])); + + return err; +} + static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, u32 portid, u32 seq, int event, unsigned int flags) { @@ -4954,6 +5074,7 @@ static struct rtnl_af_ops inet6_ops = { .family = AF_INET6, .fill_link_af = inet6_fill_link_af, .get_link_af_size = inet6_get_link_af_size, + .set_link_af = inet6_set_link_af, }; /* diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index e33fe0ab2568..2bab2aa59745 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -118,6 +118,18 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt ipv6_addr_loopback(&hdr->daddr)) goto err; + /* RFC4291 Errata ID: 3480 + * Interface-Local scope spans only a single interface on a + * node and is useful only for loopback transmission of + * multicast. Packets with interface-local scope received + * from another node must be discarded. + */ + if (!(skb->pkt_type == PACKET_LOOPBACK || + dev->flags & IFF_LOOPBACK) && + ipv6_addr_is_multicast(&hdr->daddr) && + IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 1) + goto err; + /* RFC4291 2.7 * Nodes must not originate a packet to a multicast address whose scope * field contains the reserved value 0; if such a packet is received, it diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 155eccfa7760..d2eedf192330 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1224,11 +1224,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, } /* For UDP, check if TX timestamp is enabled */ - if (sk->sk_type == SOCK_DGRAM) { - err = sock_tx_timestamp(sk, &tx_flags); - if (err) - goto error; - } + if (sk->sk_type == SOCK_DGRAM) + sock_tx_timestamp(sk, &tx_flags); /* * Let's try using as much space as possible. diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 341b54ade72c..8861b1ef420e 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -284,6 +284,7 @@ static void trace_packet(const struct sk_buff *skb, const char *hookname, *chainname, *comment; const struct ip6t_entry *iter; unsigned int rulenum = 0; + struct net *net = dev_net(in ? in : out); table_base = private->entries[smp_processor_id()]; root = get_entry(table_base, private->hook_entry[hook]); @@ -296,7 +297,7 @@ static void trace_packet(const struct sk_buff *skb, &chainname, &comment, &rulenum) != 0) break; - nf_log_packet(AF_INET6, hook, skb, in, out, &trace_loginfo, + nf_log_packet(net, AF_INET6, hook, skb, in, out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", tablename, chainname, comment, rulenum); } diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c index 938e0b7ea1bd..590f767db5d4 100644 --- a/net/ipv6/netfilter/ip6t_NPT.c +++ b/net/ipv6/netfilter/ip6t_NPT.c @@ -52,7 +52,7 @@ static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt, if (pfx_len - i >= 32) mask = 0; else - mask = htonl(~((1 << (pfx_len - i)) - 1)); + mask = htonl((1 << (i - pfx_len + 32)) - 1); idx = i / 32; addr->s6_addr32[idx] &= mask; diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 24df3dde0076..b3807c5cb888 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -131,7 +131,8 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb, type + 128); nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple); if (LOG_INVALID(nf_ct_net(ct), IPPROTO_ICMPV6)) - nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, + nf_log_packet(nf_ct_net(ct), PF_INET6, 0, skb, NULL, + NULL, NULL, "nf_ct_icmpv6: invalid new with type %d ", type + 128); return false; @@ -203,7 +204,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih); if (icmp6h == NULL) { if (LOG_INVALID(net, IPPROTO_ICMPV6)) - nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL, "nf_ct_icmpv6: short packet "); return -NF_ACCEPT; } @@ -211,7 +212,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) { if (LOG_INVALID(net, IPPROTO_ICMPV6)) - nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL, "nf_ct_icmpv6: ICMPv6 checksum failed "); return -NF_ACCEPT; } diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 6700069949dd..dffdc1a389c5 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -41,6 +41,7 @@ #include <net/rawv6.h> #include <net/ndisc.h> #include <net/addrconf.h> +#include <net/inet_ecn.h> #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> #include <linux/sysctl.h> #include <linux/netfilter.h> @@ -138,6 +139,11 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net) } #endif +static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) +{ + return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK); +} + static unsigned int nf_hashfn(struct inet_frag_queue *q) { const struct frag_queue *nq; @@ -166,7 +172,7 @@ static void nf_ct_frag6_expire(unsigned long data) /* Creation primitives. */ static inline struct frag_queue *fq_find(struct net *net, __be32 id, u32 user, struct in6_addr *src, - struct in6_addr *dst) + struct in6_addr *dst, u8 ecn) { struct inet_frag_queue *q; struct ip6_create_arg arg; @@ -176,6 +182,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id, arg.user = user; arg.src = src; arg.dst = dst; + arg.ecn = ecn; read_lock_bh(&nf_frags.lock); hash = inet6_hash_frag(id, src, dst, nf_frags.rnd); @@ -196,6 +203,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, struct sk_buff *prev, *next; unsigned int payload_len; int offset, end; + u8 ecn; if (fq->q.last_in & INET_FRAG_COMPLETE) { pr_debug("Already completed\n"); @@ -213,6 +221,8 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, return -1; } + ecn = ip6_frag_ecn(ipv6_hdr(skb)); + if (skb->ip_summed == CHECKSUM_COMPLETE) { const unsigned char *nh = skb_network_header(skb); skb->csum = csum_sub(skb->csum, @@ -317,6 +327,7 @@ found: } fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; + fq->ecn |= ecn; if (payload_len > fq->q.max_size) fq->q.max_size = payload_len; add_frag_mem_limit(&fq->q, skb->truesize); @@ -352,12 +363,17 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) { struct sk_buff *fp, *op, *head = fq->q.fragments; int payload_len; + u8 ecn; inet_frag_kill(&fq->q, &nf_frags); WARN_ON(head == NULL); WARN_ON(NFCT_FRAG6_CB(head)->offset != 0); + ecn = ip_frag_ecn_table[fq->ecn]; + if (unlikely(ecn == 0xff)) + goto out_fail; + /* Unfragmented part is taken from the first segment. */ payload_len = ((head->data - skb_network_header(head)) - sizeof(struct ipv6hdr) + fq->q.len - @@ -428,6 +444,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) head->dev = dev; head->tstamp = fq->q.stamp; ipv6_hdr(head)->payload_len = htons(payload_len); + ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn); IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size; /* Yes, and fold redundant checksum back. 8) */ @@ -572,7 +589,8 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false); local_bh_enable(); - fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr); + fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr, + ip6_frag_ecn(hdr)); if (fq == NULL) { pr_debug("Can't find and can't create new queue\n"); goto ret_orig; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 1033d2b1d81e..e51bd1a58264 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -386,6 +386,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (dst) dst->ops->redirect(dst, sk, skb); + goto out; } if (type == ICMPV6_PKT_TOOBIG) { diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 9bf6a74a71d2..4770d515c2c8 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -49,8 +49,11 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) sizeof(top_iph->flow_lbl)); top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family); - dsfield = XFRM_MODE_SKB_CB(skb)->tos; - dsfield = INET_ECN_encapsulate(dsfield, dsfield); + if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) + dsfield = 0; + else + dsfield = XFRM_MODE_SKB_CB(skb)->tos; + dsfield = INET_ECN_encapsulate(dsfield, XFRM_MODE_SKB_CB(skb)->tos); if (x->props.flags & XFRM_STATE_NOECN) dsfield &= ~INET_ECN_MASK; ipv6_change_dsfield(top_iph, 0, dsfield); diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index d28e7f014cc6..0578d4fa00a9 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -305,8 +305,7 @@ static void irda_connect_response(struct irda_sock *self) IRDA_DEBUG(2, "%s()\n", __func__); - skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER, - GFP_ATOMIC); + skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER, GFP_KERNEL); if (skb == NULL) { IRDA_DEBUG(0, "%s() Unable to allocate sk_buff!\n", __func__); @@ -1120,7 +1119,7 @@ static int irda_create(struct net *net, struct socket *sock, int protocol, } /* Allocate networking socket */ - sk = sk_alloc(net, PF_IRDA, GFP_ATOMIC, &irda_proto); + sk = sk_alloc(net, PF_IRDA, GFP_KERNEL, &irda_proto); if (sk == NULL) return -ENOMEM; @@ -1386,6 +1385,8 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock, IRDA_DEBUG(4, "%s()\n", __func__); + msg->msg_namelen = 0; + skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &err); if (!skb) diff --git a/net/irda/ircomm/ircomm_core.c b/net/irda/ircomm/ircomm_core.c index 52079f19bbbe..b797daac063c 100644 --- a/net/irda/ircomm/ircomm_core.c +++ b/net/irda/ircomm/ircomm_core.c @@ -117,7 +117,7 @@ struct ircomm_cb *ircomm_open(notify_t *notify, __u8 service_type, int line) IRDA_ASSERT(ircomm != NULL, return NULL;); - self = kzalloc(sizeof(struct ircomm_cb), GFP_ATOMIC); + self = kzalloc(sizeof(struct ircomm_cb), GFP_KERNEL); if (self == NULL) return NULL; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index a7d11ffe4284..e165e8dc962e 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1328,6 +1328,8 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct sk_buff *skb, *rskb, *cskb; int err = 0; + msg->msg_namelen = 0; + if ((sk->sk_state == IUCV_DISCONN) && skb_queue_empty(&iucv->backlog_skb_q) && skb_queue_empty(&sk->sk_receive_queue) && @@ -1461,7 +1463,8 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock, return iucv_accept_poll(sk); if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR; + mask |= POLLERR | + (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= POLLRDHUP; diff --git a/net/key/af_key.c b/net/key/af_key.c index 8555f331ea60..5b1e5af25713 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2693,6 +2693,7 @@ static int key_notify_policy_flush(const struct km_event *c) hdr->sadb_msg_pid = c->portid; hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_errno = (uint8_t) 0; + hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); pfkey_broadcast(skb_out, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net); return 0; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index c74f5a91ff6a..b8a6039314e8 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -690,6 +690,7 @@ static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk, lsa->l2tp_addr = ipv6_hdr(skb)->saddr; lsa->l2tp_flowinfo = 0; lsa->l2tp_scope_id = 0; + lsa->l2tp_conn_id = 0; if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL) lsa->l2tp_scope_id = IP6CB(skb)->iif; } diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 88709882c464..48aaa89253e0 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -720,6 +720,8 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, int target; /* Read at least this many bytes */ long timeo; + msg->msg_namelen = 0; + lock_sock(sk); copied = -ENOTCONN; if (unlikely(sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN)) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 1d1ddabd89ca..c50c19402588 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -175,7 +175,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, * add it to the device after the station. */ if (!sta || !test_sta_flag(sta, WLAN_STA_ASSOC)) { - ieee80211_key_free(sdata->local, key); + ieee80211_key_free_unused(key); err = -ENOENT; goto out_unlock; } @@ -214,8 +214,6 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, } err = ieee80211_key_link(key, sdata, sta); - if (err) - ieee80211_key_free(sdata->local, key); out_unlock: mutex_unlock(&sdata->local->sta_mtx); @@ -254,7 +252,7 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, goto out_unlock; } - __ieee80211_key_free(key, true); + ieee80211_key_free(key, true); ret = 0; out_unlock: @@ -445,12 +443,14 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; struct timespec uptime; + u64 packets = 0; + int ac; sinfo->generation = sdata->local->sta_generation; sinfo->filled = STATION_INFO_INACTIVE_TIME | - STATION_INFO_RX_BYTES | - STATION_INFO_TX_BYTES | + STATION_INFO_RX_BYTES64 | + STATION_INFO_TX_BYTES64 | STATION_INFO_RX_PACKETS | STATION_INFO_TX_PACKETS | STATION_INFO_TX_RETRIES | @@ -467,10 +467,14 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->connected_time = uptime.tv_sec - sta->last_connected; sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx); + sinfo->tx_bytes = 0; + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + sinfo->tx_bytes += sta->tx_bytes[ac]; + packets += sta->tx_packets[ac]; + } + sinfo->tx_packets = packets; sinfo->rx_bytes = sta->rx_bytes; - sinfo->tx_bytes = sta->tx_bytes; sinfo->rx_packets = sta->rx_packets; - sinfo->tx_packets = sta->tx_packets; sinfo->tx_retries = sta->tx_retry_count; sinfo->tx_failed = sta->tx_retry_failed; sinfo->rx_dropped_misc = sta->rx_dropped; @@ -598,8 +602,8 @@ static void ieee80211_get_et_stats(struct wiphy *wiphy, data[i++] += sta->rx_fragments; \ data[i++] += sta->rx_dropped; \ \ - data[i++] += sta->tx_packets; \ - data[i++] += sta->tx_bytes; \ + data[i++] += sinfo.tx_packets; \ + data[i++] += sinfo.tx_bytes; \ data[i++] += sta->tx_fragments; \ data[i++] += sta->tx_filtered_count; \ data[i++] += sta->tx_retry_failed; \ @@ -621,13 +625,14 @@ static void ieee80211_get_et_stats(struct wiphy *wiphy, if (!(sta && !WARN_ON(sta->sdata->dev != dev))) goto do_survey; + sinfo.filled = 0; + sta_set_sinfo(sta, &sinfo); + i = 0; ADD_STA_STATS(sta); data[i++] = sta->sta_state; - sinfo.filled = 0; - sta_set_sinfo(sta, &sinfo); if (sinfo.filled & STATION_INFO_TX_BITRATE) data[i] = 100000 * @@ -2636,7 +2641,7 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, list_del(&dep->list); mutex_unlock(&local->mtx); - ieee80211_roc_notify_destroy(dep); + ieee80211_roc_notify_destroy(dep, true); return 0; } @@ -2676,7 +2681,7 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, ieee80211_start_next_roc(local); mutex_unlock(&local->mtx); - ieee80211_roc_notify_destroy(found); + ieee80211_roc_notify_destroy(found, true); } else { /* work may be pending so use it all the time */ found->abort = true; @@ -2686,6 +2691,8 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, /* work will clean up etc */ flush_delayed_work(&found->work); + WARN_ON(!found->to_be_freed); + kfree(found); } return 0; diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 78c0d90dd641..931be419ab5a 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -63,6 +63,7 @@ ieee80211_new_chanctx(struct ieee80211_local *local, enum ieee80211_chanctx_mode mode) { struct ieee80211_chanctx *ctx; + u32 changed; int err; lockdep_assert_held(&local->chanctx_mtx); @@ -76,6 +77,13 @@ ieee80211_new_chanctx(struct ieee80211_local *local, ctx->conf.rx_chains_dynamic = 1; ctx->mode = mode; + /* acquire mutex to prevent idle from changing */ + mutex_lock(&local->mtx); + /* turn idle off *before* setting channel -- some drivers need that */ + changed = ieee80211_idle_off(local); + if (changed) + ieee80211_hw_config(local, changed); + if (!local->use_chanctx) { local->_oper_channel_type = cfg80211_get_chandef_type(chandef); @@ -85,14 +93,17 @@ ieee80211_new_chanctx(struct ieee80211_local *local, err = drv_add_chanctx(local, ctx); if (err) { kfree(ctx); - return ERR_PTR(err); + ctx = ERR_PTR(err); + + ieee80211_recalc_idle(local); + goto out; } } + /* and keep the mutex held until the new chanctx is on the list */ list_add_rcu(&ctx->list, &local->chanctx_list); - mutex_lock(&local->mtx); - ieee80211_recalc_idle(local); + out: mutex_unlock(&local->mtx); return ctx; diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c index c3a3082b72e5..1521cabad3d6 100644 --- a/net/mac80211/debugfs_key.c +++ b/net/mac80211/debugfs_key.c @@ -295,7 +295,7 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata) char buf[50]; struct ieee80211_key *key; - if (!sdata->debugfs.dir) + if (!sdata->vif.debugfs_dir) return; lockdep_assert_held(&sdata->local->key_mtx); @@ -311,7 +311,7 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata) sprintf(buf, "../keys/%d", key->debugfs.cnt); sdata->debugfs.default_unicast_key = debugfs_create_symlink("default_unicast_key", - sdata->debugfs.dir, buf); + sdata->vif.debugfs_dir, buf); } if (sdata->debugfs.default_multicast_key) { @@ -325,7 +325,7 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata) sprintf(buf, "../keys/%d", key->debugfs.cnt); sdata->debugfs.default_multicast_key = debugfs_create_symlink("default_multicast_key", - sdata->debugfs.dir, buf); + sdata->vif.debugfs_dir, buf); } } @@ -334,7 +334,7 @@ void ieee80211_debugfs_key_add_mgmt_default(struct ieee80211_sub_if_data *sdata) char buf[50]; struct ieee80211_key *key; - if (!sdata->debugfs.dir) + if (!sdata->vif.debugfs_dir) return; key = key_mtx_dereference(sdata->local, @@ -343,7 +343,7 @@ void ieee80211_debugfs_key_add_mgmt_default(struct ieee80211_sub_if_data *sdata) sprintf(buf, "../keys/%d", key->debugfs.cnt); sdata->debugfs.default_mgmt_key = debugfs_create_symlink("default_mgmt_key", - sdata->debugfs.dir, buf); + sdata->vif.debugfs_dir, buf); } else ieee80211_debugfs_key_remove_mgmt_default(sdata); } diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 059bbb82e84f..ddb426867904 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -521,7 +521,7 @@ IEEE80211_IF_FILE(dot11MeshAwakeWindowDuration, #endif #define DEBUGFS_ADD_MODE(name, mode) \ - debugfs_create_file(#name, mode, sdata->debugfs.dir, \ + debugfs_create_file(#name, mode, sdata->vif.debugfs_dir, \ sdata, &name##_ops); #define DEBUGFS_ADD(name) DEBUGFS_ADD_MODE(name, 0400) @@ -577,7 +577,7 @@ static void add_mesh_files(struct ieee80211_sub_if_data *sdata) static void add_mesh_stats(struct ieee80211_sub_if_data *sdata) { struct dentry *dir = debugfs_create_dir("mesh_stats", - sdata->debugfs.dir); + sdata->vif.debugfs_dir); #define MESHSTATS_ADD(name)\ debugfs_create_file(#name, 0400, dir, sdata, &name##_ops); @@ -594,7 +594,7 @@ static void add_mesh_stats(struct ieee80211_sub_if_data *sdata) static void add_mesh_config(struct ieee80211_sub_if_data *sdata) { struct dentry *dir = debugfs_create_dir("mesh_config", - sdata->debugfs.dir); + sdata->vif.debugfs_dir); #define MESHPARAMS_ADD(name) \ debugfs_create_file(#name, 0600, dir, sdata, &name##_ops); @@ -631,7 +631,7 @@ static void add_mesh_config(struct ieee80211_sub_if_data *sdata) static void add_files(struct ieee80211_sub_if_data *sdata) { - if (!sdata->debugfs.dir) + if (!sdata->vif.debugfs_dir) return; DEBUGFS_ADD(flags); @@ -673,21 +673,21 @@ void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata) char buf[10+IFNAMSIZ]; sprintf(buf, "netdev:%s", sdata->name); - sdata->debugfs.dir = debugfs_create_dir(buf, + sdata->vif.debugfs_dir = debugfs_create_dir(buf, sdata->local->hw.wiphy->debugfsdir); - if (sdata->debugfs.dir) + if (sdata->vif.debugfs_dir) sdata->debugfs.subdir_stations = debugfs_create_dir("stations", - sdata->debugfs.dir); + sdata->vif.debugfs_dir); add_files(sdata); } void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata) { - if (!sdata->debugfs.dir) + if (!sdata->vif.debugfs_dir) return; - debugfs_remove_recursive(sdata->debugfs.dir); - sdata->debugfs.dir = NULL; + debugfs_remove_recursive(sdata->vif.debugfs_dir); + sdata->vif.debugfs_dir = NULL; } void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata) @@ -695,7 +695,7 @@ void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata) struct dentry *dir; char buf[10 + IFNAMSIZ]; - dir = sdata->debugfs.dir; + dir = sdata->vif.debugfs_dir; if (!dir) return; diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 832acea4a5cb..169664c122e2 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -241,6 +241,22 @@ static inline u64 drv_prepare_multicast(struct ieee80211_local *local, return ret; } +static inline void drv_set_multicast_list(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct netdev_hw_addr_list *mc_list) +{ + bool allmulti = sdata->flags & IEEE80211_SDATA_ALLMULTI; + + trace_drv_set_multicast_list(local, sdata, mc_list->count); + + check_sdata_in_driver(sdata); + + if (local->ops->set_multicast_list) + local->ops->set_multicast_list(&local->hw, &sdata->vif, + allmulti, mc_list); + trace_drv_return_void(local); +} + static inline void drv_configure_filter(struct ieee80211_local *local, unsigned int changed_flags, unsigned int *total_flags, @@ -531,43 +547,6 @@ static inline void drv_sta_remove_debugfs(struct ieee80211_local *local, local->ops->sta_remove_debugfs(&local->hw, &sdata->vif, sta, dir); } - -static inline -void drv_add_interface_debugfs(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata) -{ - might_sleep(); - - check_sdata_in_driver(sdata); - - if (!local->ops->add_interface_debugfs) - return; - - local->ops->add_interface_debugfs(&local->hw, &sdata->vif, - sdata->debugfs.dir); -} - -static inline -void drv_remove_interface_debugfs(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata) -{ - might_sleep(); - - check_sdata_in_driver(sdata); - - if (!local->ops->remove_interface_debugfs) - return; - - local->ops->remove_interface_debugfs(&local->hw, &sdata->vif, - sdata->debugfs.dir); -} -#else -static inline -void drv_add_interface_debugfs(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata) {} -static inline -void drv_remove_interface_debugfs(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata) {} #endif static inline __must_check @@ -741,13 +720,14 @@ static inline void drv_rfkill_poll(struct ieee80211_local *local) local->ops->rfkill_poll(&local->hw); } -static inline void drv_flush(struct ieee80211_local *local, bool drop) +static inline void drv_flush(struct ieee80211_local *local, + u32 queues, bool drop) { might_sleep(); - trace_drv_flush(local, drop); + trace_drv_flush(local, queues, drop); if (local->ops->flush) - local->ops->flush(&local->hw, drop); + local->ops->flush(&local->hw, queues, drop); trace_drv_return_void(local); } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index f4433f081e77..0b09716d22ad 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -309,6 +309,7 @@ struct ieee80211_roc_work { struct ieee80211_channel *chan; bool started, abort, hw_begun, notified; + bool to_be_freed; unsigned long hw_start_time; @@ -758,7 +759,6 @@ struct ieee80211_sub_if_data { #ifdef CONFIG_MAC80211_DEBUGFS struct { - struct dentry *dir; struct dentry *subdir_stations; struct dentry *default_unicast_key; struct dentry *default_multicast_key; @@ -800,11 +800,6 @@ enum sdata_queue_type { enum { IEEE80211_RX_MSG = 1, IEEE80211_TX_STATUS_MSG = 2, - IEEE80211_EOSP_MSG = 3, -}; - -struct skb_eosp_msg_data { - u8 sta[ETH_ALEN], iface[ETH_ALEN]; }; enum queue_stop_reason { @@ -815,6 +810,7 @@ enum queue_stop_reason { IEEE80211_QUEUE_STOP_REASON_SUSPEND, IEEE80211_QUEUE_STOP_REASON_SKB_ADD, IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL, + IEEE80211_QUEUE_STOP_REASON_FLUSH, }; #ifdef CONFIG_MAC80211_LEDS @@ -1335,7 +1331,7 @@ void ieee80211_offchannel_return(struct ieee80211_local *local); void ieee80211_roc_setup(struct ieee80211_local *local); void ieee80211_start_next_roc(struct ieee80211_local *local); void ieee80211_roc_purge(struct ieee80211_sub_if_data *sdata); -void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc); +void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc, bool free); void ieee80211_sw_roc_work(struct work_struct *work); void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc); @@ -1349,6 +1345,7 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, enum nl80211_iftype type); void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata); void ieee80211_remove_interfaces(struct ieee80211_local *local); +u32 ieee80211_idle_off(struct ieee80211_local *local); void ieee80211_recalc_idle(struct ieee80211_local *local); void ieee80211_adjust_monitor_flags(struct ieee80211_sub_if_data *sdata, const int offset); @@ -1528,8 +1525,10 @@ void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata, struct ieee80211_hdr *hdr, bool ack); void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, + unsigned long queues, enum queue_stop_reason reason); void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, + unsigned long queues, enum queue_stop_reason reason); void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue, enum queue_stop_reason reason); @@ -1546,6 +1545,8 @@ static inline void ieee80211_add_pending_skbs(struct ieee80211_local *local, { ieee80211_add_pending_skbs_fn(local, skbs, NULL, NULL); } +void ieee80211_flush_queues(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata); void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, u16 transaction, u16 auth_alg, u16 status, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index d85282f64405..69aaba79a9f7 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -78,7 +78,7 @@ void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_TXPOWER); } -static u32 ieee80211_idle_off(struct ieee80211_local *local) +u32 ieee80211_idle_off(struct ieee80211_local *local) { if (!(local->hw.conf.flags & IEEE80211_CONF_IDLE)) return 0; @@ -92,7 +92,7 @@ static u32 ieee80211_idle_on(struct ieee80211_local *local) if (local->hw.conf.flags & IEEE80211_CONF_IDLE) return 0; - drv_flush(local, false); + ieee80211_flush_queues(local, NULL); local->hw.conf.flags |= IEEE80211_CONF_IDLE; return IEEE80211_CONF_CHANGE_IDLE; @@ -349,21 +349,19 @@ static void ieee80211_set_default_queues(struct ieee80211_sub_if_data *sdata) static int ieee80211_add_virtual_monitor(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; - int ret = 0; + int ret; if (!(local->hw.flags & IEEE80211_HW_WANT_MONITOR_VIF)) return 0; - mutex_lock(&local->iflist_mtx); + ASSERT_RTNL(); if (local->monitor_sdata) - goto out_unlock; + return 0; sdata = kzalloc(sizeof(*sdata) + local->hw.vif_data_size, GFP_KERNEL); - if (!sdata) { - ret = -ENOMEM; - goto out_unlock; - } + if (!sdata) + return -ENOMEM; /* set up data */ sdata->local = local; @@ -377,13 +375,13 @@ static int ieee80211_add_virtual_monitor(struct ieee80211_local *local) if (WARN_ON(ret)) { /* ok .. stupid driver, it asked for this! */ kfree(sdata); - goto out_unlock; + return ret; } ret = ieee80211_check_queues(sdata); if (ret) { kfree(sdata); - goto out_unlock; + return ret; } ret = ieee80211_vif_use_channel(sdata, &local->monitor_chandef, @@ -391,13 +389,14 @@ static int ieee80211_add_virtual_monitor(struct ieee80211_local *local) if (ret) { drv_remove_interface(local, sdata); kfree(sdata); - goto out_unlock; + return ret; } + mutex_lock(&local->iflist_mtx); rcu_assign_pointer(local->monitor_sdata, sdata); - out_unlock: mutex_unlock(&local->iflist_mtx); - return ret; + + return 0; } static void ieee80211_del_virtual_monitor(struct ieee80211_local *local) @@ -407,14 +406,20 @@ static void ieee80211_del_virtual_monitor(struct ieee80211_local *local) if (!(local->hw.flags & IEEE80211_HW_WANT_MONITOR_VIF)) return; + ASSERT_RTNL(); + mutex_lock(&local->iflist_mtx); sdata = rcu_dereference_protected(local->monitor_sdata, lockdep_is_held(&local->iflist_mtx)); - if (!sdata) - goto out_unlock; + if (!sdata) { + mutex_unlock(&local->iflist_mtx); + return; + } rcu_assign_pointer(local->monitor_sdata, NULL); + mutex_unlock(&local->iflist_mtx); + synchronize_net(); ieee80211_vif_release_channel(sdata); @@ -422,8 +427,6 @@ static void ieee80211_del_virtual_monitor(struct ieee80211_local *local) drv_remove_interface(local, sdata); kfree(sdata); - out_unlock: - mutex_unlock(&local->iflist_mtx); } /* @@ -557,8 +560,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) goto err_del_interface; } - drv_add_interface_debugfs(local, sdata); - if (sdata->vif.type == NL80211_IFTYPE_AP) { local->fif_pspoll++; local->fif_probe_req++; @@ -846,8 +847,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, case NL80211_IFTYPE_AP: skb_queue_purge(&sdata->skb_queue); - drv_remove_interface_debugfs(local, sdata); - if (going_down) drv_remove_interface(local, sdata); } @@ -919,6 +918,17 @@ static void ieee80211_set_multicast_list(struct net_device *dev) atomic_dec(&local->iff_promiscs); sdata->flags ^= IEEE80211_SDATA_PROMISC; } + + /* + * TODO: If somebody needs this on AP interfaces, + * it can be enabled easily but multicast + * addresses from VLANs need to be synced. + */ + if (sdata->vif.type != NL80211_IFTYPE_MONITOR && + sdata->vif.type != NL80211_IFTYPE_AP_VLAN && + sdata->vif.type != NL80211_IFTYPE_AP) + drv_set_multicast_list(local, sdata, &dev->mc); + spin_lock_bh(&local->filter_lock); __hw_addr_sync(&local->mc_list, &dev->mc, dev->addr_len); spin_unlock_bh(&local->filter_lock); diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 99e9f6ae6a54..67059b88fea5 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -248,11 +248,11 @@ void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, } -static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, - struct sta_info *sta, - bool pairwise, - struct ieee80211_key *old, - struct ieee80211_key *new) +static void ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + bool pairwise, + struct ieee80211_key *old, + struct ieee80211_key *new) { int idx; bool defunikey, defmultikey, defmgmtkey; @@ -397,25 +397,21 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, return key; } +static void ieee80211_key_free_common(struct ieee80211_key *key) +{ + if (key->conf.cipher == WLAN_CIPHER_SUITE_CCMP) + ieee80211_aes_key_free(key->u.ccmp.tfm); + if (key->conf.cipher == WLAN_CIPHER_SUITE_AES_CMAC) + ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm); + kfree(key); +} + static void __ieee80211_key_destroy(struct ieee80211_key *key, bool delay_tailroom) { - if (!key) - return; - - /* - * Synchronize so the TX path can no longer be using - * this key before we free/remove it. - */ - synchronize_net(); - if (key->local) ieee80211_key_disable_hw_accel(key); - if (key->conf.cipher == WLAN_CIPHER_SUITE_CCMP) - ieee80211_aes_key_free(key->u.ccmp.tfm); - if (key->conf.cipher == WLAN_CIPHER_SUITE_AES_CMAC) - ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm); if (key->local) { struct ieee80211_sub_if_data *sdata = key->sdata; @@ -431,7 +427,28 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key, } } - kfree(key); + ieee80211_key_free_common(key); +} + +static void ieee80211_key_destroy(struct ieee80211_key *key, + bool delay_tailroom) +{ + if (!key) + return; + + /* + * Synchronize so the TX path can no longer be using + * this key before we free/remove it. + */ + synchronize_net(); + + __ieee80211_key_destroy(key, delay_tailroom); +} + +void ieee80211_key_free_unused(struct ieee80211_key *key) +{ + WARN_ON(key->sdata || key->local); + ieee80211_key_free_common(key); } int ieee80211_key_link(struct ieee80211_key *key, @@ -462,19 +479,22 @@ int ieee80211_key_link(struct ieee80211_key *key, increment_tailroom_need_count(sdata); - __ieee80211_key_replace(sdata, sta, pairwise, old_key, key); - __ieee80211_key_destroy(old_key, true); + ieee80211_key_replace(sdata, sta, pairwise, old_key, key); + ieee80211_key_destroy(old_key, true); ieee80211_debugfs_key_add(key); ret = ieee80211_key_enable_hw_accel(key); + if (ret) + ieee80211_key_free(key, true); + mutex_unlock(&sdata->local->key_mtx); return ret; } -void __ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom) +void ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom) { if (!key) return; @@ -483,18 +503,10 @@ void __ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom) * Replace key with nothingness if it was ever used. */ if (key->sdata) - __ieee80211_key_replace(key->sdata, key->sta, + ieee80211_key_replace(key->sdata, key->sta, key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, key, NULL); - __ieee80211_key_destroy(key, delay_tailroom); -} - -void ieee80211_key_free(struct ieee80211_local *local, - struct ieee80211_key *key) -{ - mutex_lock(&local->key_mtx); - __ieee80211_key_free(key, true); - mutex_unlock(&local->key_mtx); + ieee80211_key_destroy(key, delay_tailroom); } void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata) @@ -554,6 +566,7 @@ EXPORT_SYMBOL(ieee80211_iter_keys); void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata) { struct ieee80211_key *key, *tmp; + LIST_HEAD(keys); cancel_delayed_work_sync(&sdata->dec_tailroom_needed_wk); @@ -565,17 +578,65 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata) ieee80211_debugfs_key_remove_mgmt_default(sdata); - list_for_each_entry_safe(key, tmp, &sdata->key_list, list) - __ieee80211_key_free(key, false); + list_for_each_entry_safe(key, tmp, &sdata->key_list, list) { + ieee80211_key_replace(key->sdata, key->sta, + key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, + key, NULL); + list_add_tail(&key->list, &keys); + } ieee80211_debugfs_key_update_default(sdata); + if (!list_empty(&keys)) { + synchronize_net(); + list_for_each_entry_safe(key, tmp, &keys, list) + __ieee80211_key_destroy(key, false); + } + WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt || sdata->crypto_tx_tailroom_pending_dec); mutex_unlock(&sdata->local->key_mtx); } +void ieee80211_free_sta_keys(struct ieee80211_local *local, + struct sta_info *sta) +{ + struct ieee80211_key *key, *tmp; + LIST_HEAD(keys); + int i; + + mutex_lock(&local->key_mtx); + for (i = 0; i < NUM_DEFAULT_KEYS; i++) { + key = key_mtx_dereference(local, sta->gtk[i]); + if (!key) + continue; + ieee80211_key_replace(key->sdata, key->sta, + key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, + key, NULL); + list_add(&key->list, &keys); + } + + key = key_mtx_dereference(local, sta->ptk); + if (key) { + ieee80211_key_replace(key->sdata, key->sta, + key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, + key, NULL); + list_add(&key->list, &keys); + } + + /* + * NB: the station code relies on this being + * done even if there aren't any keys + */ + synchronize_net(); + + list_for_each_entry_safe(key, tmp, &keys, list) + __ieee80211_key_destroy(key, true); + + mutex_unlock(&local->key_mtx); +} + void ieee80211_delayed_tailroom_dec(struct work_struct *wk) { struct ieee80211_sub_if_data *sdata; diff --git a/net/mac80211/key.h b/net/mac80211/key.h index 2a682d81cee9..e8de3e6d7804 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -129,19 +129,20 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, size_t seq_len, const u8 *seq); /* * Insert a key into data structures (sdata, sta if necessary) - * to make it used, free old key. + * to make it used, free old key. On failure, also free the new key. */ -int __must_check ieee80211_key_link(struct ieee80211_key *key, - struct ieee80211_sub_if_data *sdata, - struct sta_info *sta); -void __ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom); -void ieee80211_key_free(struct ieee80211_local *local, - struct ieee80211_key *key); +int ieee80211_key_link(struct ieee80211_key *key, + struct ieee80211_sub_if_data *sdata, + struct sta_info *sta); +void ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom); +void ieee80211_key_free_unused(struct ieee80211_key *key); void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx, bool uni, bool multi); void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, int idx); void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata); +void ieee80211_free_sta_keys(struct ieee80211_local *local, + struct sta_info *sta); void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata); #define key_mtx_dereference(local, ref) \ diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 5a53aa5ede80..c6f81ecc36a1 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -100,7 +100,6 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local) int power; enum nl80211_channel_type channel_type; u32 offchannel_flag; - bool scanning = false; offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL; if (local->scan_channel) { @@ -147,9 +146,6 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local) changed |= IEEE80211_CONF_CHANGE_SMPS; } - scanning = test_bit(SCAN_SW_SCANNING, &local->scanning) || - test_bit(SCAN_ONCHANNEL_SCANNING, &local->scanning) || - test_bit(SCAN_HW_SCANNING, &local->scanning); power = chan->max_power; rcu_read_lock(); @@ -226,8 +222,6 @@ u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata) static void ieee80211_tasklet_handler(unsigned long data) { struct ieee80211_local *local = (struct ieee80211_local *) data; - struct sta_info *sta, *tmp; - struct skb_eosp_msg_data *eosp_data; struct sk_buff *skb; while ((skb = skb_dequeue(&local->skb_queue)) || @@ -243,18 +237,6 @@ static void ieee80211_tasklet_handler(unsigned long data) skb->pkt_type = 0; ieee80211_tx_status(&local->hw, skb); break; - case IEEE80211_EOSP_MSG: - eosp_data = (void *)skb->cb; - for_each_sta_info(local, eosp_data->sta, sta, tmp) { - /* skip wrong virtual interface */ - if (memcmp(eosp_data->iface, - sta->sdata->vif.addr, ETH_ALEN)) - continue; - clear_sta_flag(sta, WLAN_STA_SP); - break; - } - dev_kfree_skb(skb); - break; default: WARN(1, "mac80211: Packet is of unknown type %d\n", skb->pkt_type); @@ -295,8 +277,8 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw) "Hardware restart was requested\n"); /* use this reason, ieee80211_reconfig will unblock it */ - ieee80211_stop_queues_by_reason(hw, - IEEE80211_QUEUE_STOP_REASON_SUSPEND); + ieee80211_stop_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_SUSPEND); /* * Stop all Rx during the reconfig. We don't want state changes diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 5ac017f3fcd2..123a300cef57 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -699,10 +699,8 @@ out_free: static int ieee80211_mesh_rebuild_beacon(struct ieee80211_if_mesh *ifmsh) { - struct ieee80211_sub_if_data *sdata; struct beacon_data *old_bcn; int ret; - sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh); mutex_lock(&ifmsh->mtx); @@ -833,9 +831,8 @@ ieee80211_mesh_rx_probe_req(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *hdr; struct ieee802_11_elems elems; size_t baselen; - u8 *pos, *end; + u8 *pos; - end = ((u8 *) mgmt) + len; pos = mgmt->u.probe_req.variable; baselen = (u8 *) pos - (u8 *) mgmt; if (baselen > len) @@ -1007,7 +1004,8 @@ void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) rcu_read_lock(); list_for_each_entry_rcu(sdata, &local->interfaces, list) - if (ieee80211_vif_is_mesh(&sdata->vif)) + if (ieee80211_vif_is_mesh(&sdata->vif) && + ieee80211_sdata_running(sdata)) ieee80211_queue_work(&local->hw, &sdata->work); rcu_read_unlock(); } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index fdc06e381c10..e06dbbf8cb4c 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1009,6 +1009,7 @@ static void ieee80211_chswitch_work(struct work_struct *work) /* XXX: wait for a beacon first? */ ieee80211_wake_queues_by_reason(&sdata->local->hw, + IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); out: ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED; @@ -1108,6 +1109,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, if (sw_elem->mode) ieee80211_stop_queues_by_reason(&sdata->local->hw, + IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); if (sdata->local->ops->channel_switch) { @@ -1375,6 +1377,7 @@ void ieee80211_dynamic_ps_disable_work(struct work_struct *work) } ieee80211_wake_queues_by_reason(&local->hw, + IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_PS); } @@ -1436,7 +1439,7 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work) else { ieee80211_send_nullfunc(local, sdata, 1); /* Flush to get the tx status of nullfunc frame */ - drv_flush(local, false); + ieee80211_flush_queues(local, sdata); } } @@ -1767,7 +1770,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, /* flush out any pending frame (e.g. DELBA) before deauth/disassoc */ if (tx) - drv_flush(local, false); + ieee80211_flush_queues(local, sdata); /* deauthenticate/disassociate now */ if (tx || frame_buf) @@ -1776,7 +1779,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, /* flush out frame */ if (tx) - drv_flush(local, false); + ieee80211_flush_queues(local, sdata); /* clear bssid only after building the needed mgmt frames */ memset(ifmgd->bssid, 0, ETH_ALEN); @@ -1948,7 +1951,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) ifmgd->probe_timeout = jiffies + msecs_to_jiffies(probe_wait_ms); run_again(ifmgd, ifmgd->probe_timeout); if (sdata->local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) - drv_flush(sdata->local, false); + ieee80211_flush_queues(sdata->local, sdata); } static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, @@ -2071,6 +2074,7 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) true, frame_buf); ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED; ieee80211_wake_queues_by_reason(&sdata->local->hw, + IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); mutex_unlock(&ifmgd->mtx); @@ -3527,8 +3531,10 @@ void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local) /* Restart STA timers */ rcu_read_lock(); - list_for_each_entry_rcu(sdata, &local->interfaces, list) - ieee80211_restart_sta_timer(sdata); + list_for_each_entry_rcu(sdata, &local->interfaces, list) { + if (ieee80211_sdata_running(sdata)) + ieee80211_restart_sta_timer(sdata); + } rcu_read_unlock(); } diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index db547fceaeb9..cce795871ab1 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -118,9 +118,9 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local) * Stop queues and transmit all frames queued by the driver * before sending nullfunc to enable powersave at the AP. */ - ieee80211_stop_queues_by_reason(&local->hw, + ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL); - drv_flush(local, false); + ieee80211_flush_queues(local, NULL); mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { @@ -181,7 +181,7 @@ void ieee80211_offchannel_return(struct ieee80211_local *local) } mutex_unlock(&local->iflist_mtx); - ieee80211_wake_queues_by_reason(&local->hw, + ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL); } @@ -297,10 +297,13 @@ void ieee80211_start_next_roc(struct ieee80211_local *local) } } -void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc) +void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc, bool free) { struct ieee80211_roc_work *dep, *tmp; + if (WARN_ON(roc->to_be_freed)) + return; + /* was never transmitted */ if (roc->frame) { cfg80211_mgmt_tx_status(&roc->sdata->wdev, @@ -316,9 +319,12 @@ void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc) GFP_KERNEL); list_for_each_entry_safe(dep, tmp, &roc->dependents, list) - ieee80211_roc_notify_destroy(dep); + ieee80211_roc_notify_destroy(dep, true); - kfree(roc); + if (free) + kfree(roc); + else + roc->to_be_freed = true; } void ieee80211_sw_roc_work(struct work_struct *work) @@ -331,6 +337,9 @@ void ieee80211_sw_roc_work(struct work_struct *work) mutex_lock(&local->mtx); + if (roc->to_be_freed) + goto out_unlock; + if (roc->abort) goto finish; @@ -370,10 +379,10 @@ void ieee80211_sw_roc_work(struct work_struct *work) finish: list_del(&roc->list); started = roc->started; - ieee80211_roc_notify_destroy(roc); + ieee80211_roc_notify_destroy(roc, !roc->abort); if (started) { - drv_flush(local, false); + ieee80211_flush_queues(local, NULL); local->tmp_channel = NULL; ieee80211_hw_config(local, 0); @@ -410,7 +419,7 @@ static void ieee80211_hw_roc_done(struct work_struct *work) list_del(&roc->list); - ieee80211_roc_notify_destroy(roc); + ieee80211_roc_notify_destroy(roc, true); /* if there's another roc, start it now */ ieee80211_start_next_roc(local); @@ -460,12 +469,14 @@ void ieee80211_roc_purge(struct ieee80211_sub_if_data *sdata) list_for_each_entry_safe(roc, tmp, &tmp_list, list) { if (local->ops->remain_on_channel) { list_del(&roc->list); - ieee80211_roc_notify_destroy(roc); + ieee80211_roc_notify_destroy(roc, true); } else { ieee80211_queue_delayed_work(&local->hw, &roc->work, 0); /* work will clean up etc */ flush_delayed_work(&roc->work); + WARN_ON(!roc->to_be_freed); + kfree(roc); } } diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index b471a67f224d..3d16f4e61743 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -30,12 +30,13 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) } ieee80211_stop_queues_by_reason(hw, - IEEE80211_QUEUE_STOP_REASON_SUSPEND); + IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_SUSPEND); /* flush out all packets */ synchronize_net(); - drv_flush(local, false); + ieee80211_flush_queues(local, NULL); local->quiescing = true; /* make quiescing visible to timers everywhere */ @@ -68,6 +69,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) mutex_unlock(&local->sta_mtx); } ieee80211_wake_queues_by_reason(hw, + IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_SUSPEND); return err; } else if (err > 0) { diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 749552bdcfe1..d2b264d1311d 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -202,14 +202,23 @@ minstrel_ht_calc_tp(struct minstrel_ht_sta *mi, int group, int rate) struct minstrel_rate_stats *mr; unsigned int nsecs = 0; unsigned int tp; + unsigned int prob; mr = &mi->groups[group].rates[rate]; + prob = mr->probability; - if (mr->probability < MINSTREL_FRAC(1, 10)) { + if (prob < MINSTREL_FRAC(1, 10)) { mr->cur_tp = 0; return; } + /* + * For the throughput calculation, limit the probability value to 90% to + * account for collision related packet error rate fluctuation + */ + if (prob > MINSTREL_FRAC(9, 10)) + prob = MINSTREL_FRAC(9, 10); + if (group != MINSTREL_CCK_GROUP) nsecs = 1000 * mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len); @@ -639,15 +648,18 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) /* * Sampling might add some overhead (RTS, no aggregation) * to the frame. Hence, don't use sampling for the currently - * used max TP rate. + * used rates. */ - if (sample_idx == mi->max_tp_rate) + if (sample_idx == mi->max_tp_rate || + sample_idx == mi->max_tp_rate2 || + sample_idx == mi->max_prob_rate) return -1; + /* - * When not using MRR, do not sample if the probability is already - * higher than 95% to avoid wasting airtime + * Do not sample if the probability is already higher than 95% + * to avoid wasting airtime. */ - if (!mp->has_mrr && (mr->probability > MINSTREL_FRAC(95, 100))) + if (mr->probability > MINSTREL_FRAC(95, 100)) return -1; /* diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 5b4492af4e85..2528b5a4d6d4 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2666,7 +2666,19 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx) memset(nskb->cb, 0, sizeof(nskb->cb)); - ieee80211_tx_skb(rx->sdata, nskb); + if (rx->sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) { + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(nskb); + + info->flags = IEEE80211_TX_CTL_TX_OFFCHAN | + IEEE80211_TX_INTFL_OFFCHAN_TX_OK | + IEEE80211_TX_CTL_NO_CCK_RATE; + if (local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) + info->hw_queue = + local->hw.offchannel_tx_hw_queue; + } + + __ieee80211_tx_skb_tid_band(rx->sdata, nskb, 7, + status->band); } dev_kfree_skb(rx->skb); return RX_QUEUED; diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 43a45cf00e06..cb34cbbaa20c 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -153,7 +153,6 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) u8 *elements; struct ieee80211_channel *channel; size_t baselen; - bool beacon; struct ieee802_11_elems elems; if (skb->len < 24 || @@ -175,11 +174,9 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) elements = mgmt->u.probe_resp.variable; baselen = offsetof(struct ieee80211_mgmt, u.probe_resp.variable); - beacon = false; } else { baselen = offsetof(struct ieee80211_mgmt, u.beacon.variable); elements = mgmt->u.beacon.variable; - beacon = true; } if (baselen > skb->len) @@ -335,7 +332,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local) ieee80211_offchannel_stop_vifs(local); /* ensure nullfunc is transmitted before leaving operating channel */ - drv_flush(local, false); + ieee80211_flush_queues(local, NULL); ieee80211_configure_filter(local); @@ -671,7 +668,7 @@ static void ieee80211_scan_state_resume(struct ieee80211_local *local, ieee80211_offchannel_stop_vifs(local); if (local->ops->flush) { - drv_flush(local, false); + ieee80211_flush_queues(local, NULL); *next_delay = 0; } else *next_delay = HZ / 10; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 3644ad79688a..11216bc13b27 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -556,6 +556,15 @@ static inline void __bss_tim_clear(u8 *tim, u16 id) tim[id / 8] &= ~(1 << (id % 8)); } +static inline bool __bss_tim_get(u8 *tim, u16 id) +{ + /* + * This format has been mandated by the IEEE specifications, + * so this line may not be changed to use the test_bit() format. + */ + return tim[id / 8] & (1 << (id % 8)); +} + static unsigned long ieee80211_tids_for_ac(int ac) { /* If we ever support TIDs > 7, this obviously needs to be adjusted */ @@ -636,6 +645,9 @@ void sta_info_recalc_tim(struct sta_info *sta) done: spin_lock_bh(&local->tim_lock); + if (indicate_tim == __bss_tim_get(ps->tim, id)) + goto out_unlock; + if (indicate_tim) __bss_tim_set(ps->tim, id); else @@ -647,6 +659,7 @@ void sta_info_recalc_tim(struct sta_info *sta) local->tim_in_locked_section = false; } +out_unlock: spin_unlock_bh(&local->tim_lock); } @@ -770,7 +783,7 @@ int __must_check __sta_info_destroy(struct sta_info *sta) { struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; - int ret, i; + int ret; might_sleep(); @@ -797,14 +810,8 @@ int __must_check __sta_info_destroy(struct sta_info *sta) list_del_rcu(&sta->list); - mutex_lock(&local->key_mtx); - for (i = 0; i < NUM_DEFAULT_KEYS; i++) - __ieee80211_key_free(key_mtx_dereference(local, sta->gtk[i]), - true); - if (sta->ptk) - __ieee80211_key_free(key_mtx_dereference(local, sta->ptk), - true); - mutex_unlock(&local->key_mtx); + /* this always calls synchronize_net() */ + ieee80211_free_sta_keys(local, sta); sta->dead = true; @@ -1390,30 +1397,16 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw, } EXPORT_SYMBOL(ieee80211_sta_block_awake); -void ieee80211_sta_eosp_irqsafe(struct ieee80211_sta *pubsta) +void ieee80211_sta_eosp(struct ieee80211_sta *pubsta) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); struct ieee80211_local *local = sta->local; - struct sk_buff *skb; - struct skb_eosp_msg_data *data; trace_api_eosp(local, pubsta); - skb = alloc_skb(0, GFP_ATOMIC); - if (!skb) { - /* too bad ... but race is better than loss */ - clear_sta_flag(sta, WLAN_STA_SP); - return; - } - - data = (void *)skb->cb; - memcpy(data->sta, pubsta->addr, ETH_ALEN); - memcpy(data->iface, sta->sdata->vif.addr, ETH_ALEN); - skb->pkt_type = IEEE80211_EOSP_MSG; - skb_queue_tail(&local->skb_queue, skb); - tasklet_schedule(&local->tasklet); + clear_sta_flag(sta, WLAN_STA_SP); } -EXPORT_SYMBOL(ieee80211_sta_eosp_irqsafe); +EXPORT_SYMBOL(ieee80211_sta_eosp); void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta, u8 tid, bool buffered) diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index e5868c32d1a3..adc30045f99e 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -333,7 +333,8 @@ struct sta_info { unsigned long driver_buffered_tids; /* Updated from RX path only, no locking requirements */ - unsigned long rx_packets, rx_bytes; + unsigned long rx_packets; + u64 rx_bytes; unsigned long wep_weak_iv_count; unsigned long last_rx; long last_connected; @@ -353,9 +354,9 @@ struct sta_info { unsigned int fail_avg; /* Updated from TX path only, no locking requirements */ - unsigned long tx_packets; - unsigned long tx_bytes; - unsigned long tx_fragments; + u32 tx_fragments; + u64 tx_packets[IEEE80211_NUM_ACS]; + u64 tx_bytes[IEEE80211_NUM_ACS]; struct ieee80211_tx_rate last_tx_rate; int last_rx_rate_idx; u32 last_rx_rate_flag; diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index e7db2b804e0c..c5899797a8d4 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -431,6 +431,30 @@ TRACE_EVENT(drv_prepare_multicast, ) ); +TRACE_EVENT(drv_set_multicast_list, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, int mc_count), + + TP_ARGS(local, sdata, mc_count), + + TP_STRUCT__entry( + LOCAL_ENTRY + __field(bool, allmulti) + __field(int, mc_count) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + __entry->allmulti = sdata->flags & IEEE80211_SDATA_ALLMULTI; + __entry->mc_count = mc_count; + ), + + TP_printk( + LOCAL_PR_FMT " configure mc filter, count=%d, allmulti=%d", + LOCAL_PR_ARG, __entry->mc_count, __entry->allmulti + ) +); + TRACE_EVENT(drv_configure_filter, TP_PROTO(struct ieee80211_local *local, unsigned int changed_flags, @@ -940,23 +964,26 @@ TRACE_EVENT(drv_get_survey, ); TRACE_EVENT(drv_flush, - TP_PROTO(struct ieee80211_local *local, bool drop), + TP_PROTO(struct ieee80211_local *local, + u32 queues, bool drop), - TP_ARGS(local, drop), + TP_ARGS(local, queues, drop), TP_STRUCT__entry( LOCAL_ENTRY __field(bool, drop) + __field(u32, queues) ), TP_fast_assign( LOCAL_ASSIGN; __entry->drop = drop; + __entry->queues = queues; ), TP_printk( - LOCAL_PR_FMT " drop:%d", - LOCAL_PR_ARG, __entry->drop + LOCAL_PR_FMT " queues:0x%x drop:%d", + LOCAL_PR_ARG, __entry->queues, __entry->drop ) ); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 4e8a86163fc7..9e67cc97b87b 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -233,6 +233,7 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx) if (local->hw.conf.flags & IEEE80211_CONF_PS) { ieee80211_stop_queues_by_reason(&local->hw, + IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_PS); ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED; ieee80211_queue_work(&local->hw, @@ -991,15 +992,18 @@ static ieee80211_tx_result debug_noinline ieee80211_tx_h_stats(struct ieee80211_tx_data *tx) { struct sk_buff *skb; + int ac = -1; if (!tx->sta) return TX_CONTINUE; - tx->sta->tx_packets++; skb_queue_walk(&tx->skbs, skb) { + ac = skb_get_queue_mapping(skb); tx->sta->tx_fragments++; - tx->sta->tx_bytes += skb->len; + tx->sta->tx_bytes[ac] += skb->len; } + if (ac >= 0) + tx->sta->tx_packets[ac]++; return TX_CONTINUE; } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index b7a856e3281b..a7368870c8ee 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -453,7 +453,8 @@ void ieee80211_add_pending_skbs_fn(struct ieee80211_local *local, } void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, - enum queue_stop_reason reason) + unsigned long queues, + enum queue_stop_reason reason) { struct ieee80211_local *local = hw_to_local(hw); unsigned long flags; @@ -461,7 +462,7 @@ void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, spin_lock_irqsave(&local->queue_stop_reason_lock, flags); - for (i = 0; i < hw->queues; i++) + for_each_set_bit(i, &queues, hw->queues) __ieee80211_stop_queue(hw, i, reason); spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); @@ -469,7 +470,7 @@ void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, void ieee80211_stop_queues(struct ieee80211_hw *hw) { - ieee80211_stop_queues_by_reason(hw, + ieee80211_stop_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_DRIVER); } EXPORT_SYMBOL(ieee80211_stop_queues); @@ -491,6 +492,7 @@ int ieee80211_queue_stopped(struct ieee80211_hw *hw, int queue) EXPORT_SYMBOL(ieee80211_queue_stopped); void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, + unsigned long queues, enum queue_stop_reason reason) { struct ieee80211_local *local = hw_to_local(hw); @@ -499,7 +501,7 @@ void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, spin_lock_irqsave(&local->queue_stop_reason_lock, flags); - for (i = 0; i < hw->queues; i++) + for_each_set_bit(i, &queues, hw->queues) __ieee80211_wake_queue(hw, i, reason); spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); @@ -507,10 +509,42 @@ void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, void ieee80211_wake_queues(struct ieee80211_hw *hw) { - ieee80211_wake_queues_by_reason(hw, IEEE80211_QUEUE_STOP_REASON_DRIVER); + ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_DRIVER); } EXPORT_SYMBOL(ieee80211_wake_queues); +void ieee80211_flush_queues(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) +{ + u32 queues; + + if (!local->ops->flush) + return; + + if (sdata && local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) { + int ac; + + queues = 0; + + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) + queues |= BIT(sdata->vif.hw_queue[ac]); + if (sdata->vif.cab_queue != IEEE80211_INVAL_HW_QUEUE) + queues |= BIT(sdata->vif.cab_queue); + } else { + /* all queues */ + queues = BIT(local->hw.queues) - 1; + } + + ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_FLUSH); + + drv_flush(local, queues, false); + + ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_FLUSH); +} + void ieee80211_iterate_active_interfaces( struct ieee80211_hw *hw, u32 iter_flags, void (*iterator)(void *data, u8 *mac, @@ -1651,8 +1685,8 @@ int ieee80211_reconfig(struct ieee80211_local *local) mutex_unlock(&local->sta_mtx); } - ieee80211_wake_queues_by_reason(hw, - IEEE80211_QUEUE_STOP_REASON_SUSPEND); + ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_SUSPEND); /* * If this is for hw restart things are still running. diff --git a/net/mac802154/mac802154.h b/net/mac802154/mac802154.h index 21fa386f4675..5c9e021994ba 100644 --- a/net/mac802154/mac802154.h +++ b/net/mac802154/mac802154.h @@ -88,8 +88,6 @@ struct mac802154_sub_if_data { #define mac802154_to_priv(_hw) container_of(_hw, struct mac802154_priv, hw) -#define MAC802154_MAX_XMIT_ATTEMPTS 3 - #define MAC802154_CHAN_NONE (~(u8)0) /* No channel is assigned */ extern struct ieee802154_reduced_mlme_ops mac802154_mlme_reduced; diff --git a/net/mac802154/mib.c b/net/mac802154/mib.c index f03e55f2ebf0..8ded97cf1c33 100644 --- a/net/mac802154/mib.c +++ b/net/mac802154/mib.c @@ -176,9 +176,15 @@ static void phy_chan_notify(struct work_struct *work) struct mac802154_sub_if_data *priv = netdev_priv(nw->dev); int res; + mutex_lock(&priv->hw->phy->pib_lock); res = hw->ops->set_channel(&hw->hw, priv->page, priv->chan); if (res) pr_debug("set_channel failed\n"); + else { + priv->hw->phy->current_channel = priv->chan; + priv->hw->phy->current_page = priv->page; + } + mutex_unlock(&priv->hw->phy->pib_lock); kfree(nw); } @@ -195,8 +201,11 @@ void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan) priv->chan = chan; spin_unlock_bh(&priv->mib_lock); + mutex_lock(&priv->hw->phy->pib_lock); if (priv->hw->phy->current_channel != priv->chan || priv->hw->phy->current_page != priv->page) { + mutex_unlock(&priv->hw->phy->pib_lock); + work = kzalloc(sizeof(*work), GFP_ATOMIC); if (!work) return; @@ -204,5 +213,6 @@ void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan) INIT_WORK(&work->work, phy_chan_notify); work->dev = dev; queue_work(priv->hw->dev_workqueue, &work->work); - } + } else + mutex_unlock(&priv->hw->phy->pib_lock); } diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c index 4e09d070995a..6d1647399d4f 100644 --- a/net/mac802154/tx.c +++ b/net/mac802154/tx.c @@ -25,6 +25,7 @@ #include <linux/if_arp.h> #include <linux/crc-ccitt.h> +#include <net/ieee802154_netdev.h> #include <net/mac802154.h> #include <net/wpan-phy.h> @@ -39,12 +40,12 @@ struct xmit_work { struct mac802154_priv *priv; u8 chan; u8 page; - u8 xmit_attempts; }; static void mac802154_xmit_worker(struct work_struct *work) { struct xmit_work *xw = container_of(work, struct xmit_work, work); + struct mac802154_sub_if_data *sdata; int res; mutex_lock(&xw->priv->phy->pib_lock); @@ -57,21 +58,23 @@ static void mac802154_xmit_worker(struct work_struct *work) pr_debug("set_channel failed\n"); goto out; } + + xw->priv->phy->current_channel = xw->chan; + xw->priv->phy->current_page = xw->page; } res = xw->priv->ops->xmit(&xw->priv->hw, xw->skb); + if (res) + pr_debug("transmission failed\n"); out: mutex_unlock(&xw->priv->phy->pib_lock); - if (res) { - if (xw->xmit_attempts++ < MAC802154_MAX_XMIT_ATTEMPTS) { - queue_work(xw->priv->dev_workqueue, &xw->work); - return; - } else - pr_debug("transmission failed for %d times", - MAC802154_MAX_XMIT_ATTEMPTS); - } + /* Restart the netif queue on each sub_if_data object. */ + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &xw->priv->slaves, list) + netif_wake_queue(sdata->dev); + rcu_read_unlock(); dev_kfree_skb(xw->skb); @@ -82,6 +85,7 @@ netdev_tx_t mac802154_tx(struct mac802154_priv *priv, struct sk_buff *skb, u8 page, u8 chan) { struct xmit_work *work; + struct mac802154_sub_if_data *sdata; if (!(priv->phy->channels_supported[page] & (1 << chan))) { WARN_ON(1); @@ -109,12 +113,17 @@ netdev_tx_t mac802154_tx(struct mac802154_priv *priv, struct sk_buff *skb, return NETDEV_TX_BUSY; } + /* Stop the netif queue on each sub_if_data object. */ + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &priv->slaves, list) + netif_stop_queue(sdata->dev); + rcu_read_unlock(); + INIT_WORK(&work->work, mac802154_xmit_worker); work->skb = skb; work->priv = priv; work->page = page; work->chan = chan; - work->xmit_attempts = 0; queue_work(priv->dev_workqueue, &work->work); diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c index 7d3f6594ed4f..2ca2f4dceab7 100644 --- a/net/mac802154/wpan.c +++ b/net/mac802154/wpan.c @@ -360,7 +360,7 @@ void mac802154_wpan_setup(struct net_device *dev) dev->header_ops = &mac802154_header_ops; dev->needed_tailroom = 2; /* FCS */ dev->mtu = IEEE802154_MTU; - dev->tx_queue_len = 10; + dev->tx_queue_len = 300; dev->type = ARPHRD_IEEE802154; dev->flags = IFF_NOARP | IFF_BROADCAST; dev->watchdog_timeo = 0; diff --git a/net/netfilter/core.c b/net/netfilter/core.c index a9c488b6c50d..7d97302f7c07 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -276,10 +276,30 @@ void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *); EXPORT_SYMBOL(nf_nat_decode_session_hook); #endif +static int __net_init netfilter_net_init(struct net *net) +{ #ifdef CONFIG_PROC_FS -struct proc_dir_entry *proc_net_netfilter; -EXPORT_SYMBOL(proc_net_netfilter); + net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter", + net->proc_net); + if (!net->nf.proc_netfilter) { + if (!net_eq(net, &init_net)) + pr_err("cannot create netfilter proc entry"); + + return -ENOMEM; + } #endif + return 0; +} + +static void __net_exit netfilter_net_exit(struct net *net) +{ + remove_proc_entry("netfilter", net->proc_net); +} + +static struct pernet_operations netfilter_net_ops = { + .init = netfilter_net_init, + .exit = netfilter_net_exit, +}; void __init netfilter_init(void) { @@ -289,11 +309,8 @@ void __init netfilter_init(void) INIT_LIST_HEAD(&nf_hooks[i][h]); } -#ifdef CONFIG_PROC_FS - proc_net_netfilter = proc_mkdir("netfilter", init_net.proc_net); - if (!proc_net_netfilter) + if (register_pernet_subsys(&netfilter_net_ops) < 0) panic("cannot create netfilter proc entry"); -#endif if (netfilter_log_init() < 0) panic("cannot initialize nf_log"); diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 0b779d7df881..dfd7b65b3d2a 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -58,6 +58,18 @@ static inline void ip_vs_app_put(struct ip_vs_app *app) module_put(app->module); } +static void ip_vs_app_inc_destroy(struct ip_vs_app *inc) +{ + kfree(inc->timeout_table); + kfree(inc); +} + +static void ip_vs_app_inc_rcu_free(struct rcu_head *head) +{ + struct ip_vs_app *inc = container_of(head, struct ip_vs_app, rcu_head); + + ip_vs_app_inc_destroy(inc); +} /* * Allocate/initialize app incarnation and register it in proto apps. @@ -106,8 +118,7 @@ ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto, return 0; out: - kfree(inc->timeout_table); - kfree(inc); + ip_vs_app_inc_destroy(inc); return ret; } @@ -131,8 +142,7 @@ ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc) list_del(&inc->a_list); - kfree(inc->timeout_table); - kfree(inc); + call_rcu(&inc->rcu_head, ip_vs_app_inc_rcu_free); } @@ -144,9 +154,9 @@ int ip_vs_app_inc_get(struct ip_vs_app *inc) { int result; - atomic_inc(&inc->usecnt); - if (unlikely((result = ip_vs_app_get(inc->app)) != 1)) - atomic_dec(&inc->usecnt); + result = ip_vs_app_get(inc->app); + if (result) + atomic_inc(&inc->usecnt); return result; } @@ -156,8 +166,8 @@ int ip_vs_app_inc_get(struct ip_vs_app *inc) */ void ip_vs_app_inc_put(struct ip_vs_app *inc) { - ip_vs_app_put(inc->app); atomic_dec(&inc->usecnt); + ip_vs_app_put(inc->app); } @@ -218,6 +228,7 @@ out_unlock: /* * ip_vs_app unregistration routine * We are sure there are no app incarnations attached to services + * Caller should use synchronize_rcu() or rcu_barrier() */ void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app) { @@ -341,14 +352,14 @@ static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq, unsigned int flag, __u32 seq, int diff) { /* spinlock is to keep updating cp->flags atomic */ - spin_lock(&cp->lock); + spin_lock_bh(&cp->lock); if (!(cp->flags & flag) || after(seq, vseq->init_seq)) { vseq->previous_delta = vseq->delta; vseq->delta += diff; vseq->init_seq = seq; cp->flags |= flag; } - spin_unlock(&cp->lock); + spin_unlock_bh(&cp->lock); } static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 704e514e02ab..de6475894a39 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -79,51 +79,21 @@ static unsigned int ip_vs_conn_rnd __read_mostly; struct ip_vs_aligned_lock { - rwlock_t l; + spinlock_t l; } __attribute__((__aligned__(SMP_CACHE_BYTES))); /* lock array for conn table */ static struct ip_vs_aligned_lock __ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned; -static inline void ct_read_lock(unsigned int key) -{ - read_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); -} - -static inline void ct_read_unlock(unsigned int key) -{ - read_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); -} - -static inline void ct_write_lock(unsigned int key) -{ - write_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); -} - -static inline void ct_write_unlock(unsigned int key) -{ - write_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); -} - -static inline void ct_read_lock_bh(unsigned int key) -{ - read_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); -} - -static inline void ct_read_unlock_bh(unsigned int key) -{ - read_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); -} - static inline void ct_write_lock_bh(unsigned int key) { - write_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); + spin_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); } static inline void ct_write_unlock_bh(unsigned int key) { - write_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); + spin_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); } @@ -197,13 +167,13 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) /* Hash by protocol, client address and port */ hash = ip_vs_conn_hashkey_conn(cp); - ct_write_lock(hash); + ct_write_lock_bh(hash); spin_lock(&cp->lock); if (!(cp->flags & IP_VS_CONN_F_HASHED)) { - hlist_add_head(&cp->c_list, &ip_vs_conn_tab[hash]); cp->flags |= IP_VS_CONN_F_HASHED; atomic_inc(&cp->refcnt); + hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]); ret = 1; } else { pr_err("%s(): request for already hashed, called from %pF\n", @@ -212,7 +182,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) } spin_unlock(&cp->lock); - ct_write_unlock(hash); + ct_write_unlock_bh(hash); return ret; } @@ -220,7 +190,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) /* * UNhashes ip_vs_conn from ip_vs_conn_tab. - * returns bool success. + * returns bool success. Caller should hold conn reference. */ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) { @@ -230,11 +200,11 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) /* unhash it and decrease its reference counter */ hash = ip_vs_conn_hashkey_conn(cp); - ct_write_lock(hash); + ct_write_lock_bh(hash); spin_lock(&cp->lock); if (cp->flags & IP_VS_CONN_F_HASHED) { - hlist_del(&cp->c_list); + hlist_del_rcu(&cp->c_list); cp->flags &= ~IP_VS_CONN_F_HASHED; atomic_dec(&cp->refcnt); ret = 1; @@ -242,7 +212,37 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) ret = 0; spin_unlock(&cp->lock); - ct_write_unlock(hash); + ct_write_unlock_bh(hash); + + return ret; +} + +/* Try to unlink ip_vs_conn from ip_vs_conn_tab. + * returns bool success. + */ +static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp) +{ + unsigned int hash; + bool ret; + + hash = ip_vs_conn_hashkey_conn(cp); + + ct_write_lock_bh(hash); + spin_lock(&cp->lock); + + if (cp->flags & IP_VS_CONN_F_HASHED) { + ret = false; + /* Decrease refcnt and unlink conn only if we are last user */ + if (atomic_cmpxchg(&cp->refcnt, 1, 0) == 1) { + hlist_del_rcu(&cp->c_list); + cp->flags &= ~IP_VS_CONN_F_HASHED; + ret = true; + } + } else + ret = atomic_read(&cp->refcnt) ? false : true; + + spin_unlock(&cp->lock); + ct_write_unlock_bh(hash); return ret; } @@ -262,24 +262,25 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p) hash = ip_vs_conn_hashkey_param(p, false); - ct_read_lock(hash); + rcu_read_lock(); - hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { - if (cp->af == p->af && - p->cport == cp->cport && p->vport == cp->vport && + hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { + if (p->cport == cp->cport && p->vport == cp->vport && + cp->af == p->af && ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) && ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) && ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && p->protocol == cp->protocol && ip_vs_conn_net_eq(cp, p->net)) { + if (!__ip_vs_conn_get(cp)) + continue; /* HIT */ - atomic_inc(&cp->refcnt); - ct_read_unlock(hash); + rcu_read_unlock(); return cp; } } - ct_read_unlock(hash); + rcu_read_unlock(); return NULL; } @@ -346,14 +347,16 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) hash = ip_vs_conn_hashkey_param(p, false); - ct_read_lock(hash); + rcu_read_lock(); - hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { - if (!ip_vs_conn_net_eq(cp, p->net)) - continue; - if (p->pe_data && p->pe->ct_match) { - if (p->pe == cp->pe && p->pe->ct_match(p, cp)) - goto out; + hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { + if (unlikely(p->pe_data && p->pe->ct_match)) { + if (!ip_vs_conn_net_eq(cp, p->net)) + continue; + if (p->pe == cp->pe && p->pe->ct_match(p, cp)) { + if (__ip_vs_conn_get(cp)) + goto out; + } continue; } @@ -363,17 +366,18 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) * p->vaddr is a fwmark */ ip_vs_addr_equal(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af, p->vaddr, &cp->vaddr) && - p->cport == cp->cport && p->vport == cp->vport && + p->vport == cp->vport && p->cport == cp->cport && cp->flags & IP_VS_CONN_F_TEMPLATE && - p->protocol == cp->protocol) - goto out; + p->protocol == cp->protocol && + ip_vs_conn_net_eq(cp, p->net)) { + if (__ip_vs_conn_get(cp)) + goto out; + } } cp = NULL; out: - if (cp) - atomic_inc(&cp->refcnt); - ct_read_unlock(hash); + rcu_read_unlock(); IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n", ip_vs_proto_name(p->protocol), @@ -398,23 +402,24 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) */ hash = ip_vs_conn_hashkey_param(p, true); - ct_read_lock(hash); + rcu_read_lock(); - hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { - if (cp->af == p->af && - p->vport == cp->cport && p->cport == cp->dport && + hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { + if (p->vport == cp->cport && p->cport == cp->dport && + cp->af == p->af && ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && p->protocol == cp->protocol && ip_vs_conn_net_eq(cp, p->net)) { + if (!__ip_vs_conn_get(cp)) + continue; /* HIT */ - atomic_inc(&cp->refcnt); ret = cp; break; } } - ct_read_unlock(hash); + rcu_read_unlock(); IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n", ip_vs_proto_name(p->protocol), @@ -457,13 +462,13 @@ void ip_vs_conn_put(struct ip_vs_conn *cp) void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport) { if (ip_vs_conn_unhash(cp)) { - spin_lock(&cp->lock); + spin_lock_bh(&cp->lock); if (cp->flags & IP_VS_CONN_F_NO_CPORT) { atomic_dec(&ip_vs_conn_no_cport_cnt); cp->flags &= ~IP_VS_CONN_F_NO_CPORT; cp->cport = cport; } - spin_unlock(&cp->lock); + spin_unlock_bh(&cp->lock); /* hash on new dport */ ip_vs_conn_hash(cp); @@ -549,7 +554,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) return; /* Increase the refcnt counter of the dest */ - atomic_inc(&dest->refcnt); + ip_vs_dest_hold(dest); conn_flags = atomic_read(&dest->conn_flags); if (cp->protocol != IPPROTO_UDP) @@ -606,20 +611,22 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) * Check if there is a destination for the connection, if so * bind the connection to the destination. */ -struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) +void ip_vs_try_bind_dest(struct ip_vs_conn *cp) { struct ip_vs_dest *dest; + rcu_read_lock(); dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr, cp->dport, &cp->vaddr, cp->vport, cp->protocol, cp->fwmark, cp->flags); if (dest) { struct ip_vs_proto_data *pd; - spin_lock(&cp->lock); + spin_lock_bh(&cp->lock); if (cp->dest) { - spin_unlock(&cp->lock); - return dest; + spin_unlock_bh(&cp->lock); + rcu_read_unlock(); + return; } /* Applications work depending on the forwarding method @@ -628,7 +635,7 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) ip_vs_unbind_app(cp); ip_vs_bind_dest(cp, dest); - spin_unlock(&cp->lock); + spin_unlock_bh(&cp->lock); /* Update its packet transmitter */ cp->packet_xmit = NULL; @@ -643,7 +650,7 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) if (pd && atomic_read(&pd->appcnt)) ip_vs_bind_app(cp, pd->pp); } - return dest; + rcu_read_unlock(); } @@ -695,12 +702,7 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp) dest->flags &= ~IP_VS_DEST_F_OVERLOAD; } - /* - * Simply decrease the refcnt of the dest, because the - * dest will be either in service's destination list - * or in the trash. - */ - atomic_dec(&dest->refcnt); + ip_vs_dest_put(dest); } static int expire_quiescent_template(struct netns_ipvs *ipvs, @@ -757,41 +759,36 @@ int ip_vs_check_template(struct ip_vs_conn *ct) * Simply decrease the refcnt of the template, * don't restart its timer. */ - atomic_dec(&ct->refcnt); + __ip_vs_conn_put(ct); return 0; } return 1; } +static void ip_vs_conn_rcu_free(struct rcu_head *head) +{ + struct ip_vs_conn *cp = container_of(head, struct ip_vs_conn, + rcu_head); + + ip_vs_pe_put(cp->pe); + kfree(cp->pe_data); + kmem_cache_free(ip_vs_conn_cachep, cp); +} + static void ip_vs_conn_expire(unsigned long data) { struct ip_vs_conn *cp = (struct ip_vs_conn *)data; struct net *net = ip_vs_conn_net(cp); struct netns_ipvs *ipvs = net_ipvs(net); - cp->timeout = 60*HZ; - - /* - * hey, I'm using it - */ - atomic_inc(&cp->refcnt); - /* * do I control anybody? */ if (atomic_read(&cp->n_control)) goto expire_later; - /* - * unhash it if it is hashed in the conn table - */ - if (!ip_vs_conn_unhash(cp) && !(cp->flags & IP_VS_CONN_F_ONE_PACKET)) - goto expire_later; - - /* - * refcnt==1 implies I'm the only one referrer - */ - if (likely(atomic_read(&cp->refcnt) == 1)) { + /* Unlink conn if not referenced anymore */ + if (likely(ip_vs_conn_unlink(cp))) { /* delete the timer if it is activated by other users */ del_timer(&cp->timer); @@ -810,38 +807,41 @@ static void ip_vs_conn_expire(unsigned long data) ip_vs_conn_drop_conntrack(cp); } - ip_vs_pe_put(cp->pe); - kfree(cp->pe_data); if (unlikely(cp->app != NULL)) ip_vs_unbind_app(cp); ip_vs_unbind_dest(cp); if (cp->flags & IP_VS_CONN_F_NO_CPORT) atomic_dec(&ip_vs_conn_no_cport_cnt); + call_rcu(&cp->rcu_head, ip_vs_conn_rcu_free); atomic_dec(&ipvs->conn_count); - - kmem_cache_free(ip_vs_conn_cachep, cp); return; } - /* hash it back to the table */ - ip_vs_conn_hash(cp); - expire_later: - IP_VS_DBG(7, "delayed: conn->refcnt-1=%d conn->n_control=%d\n", - atomic_read(&cp->refcnt)-1, + IP_VS_DBG(7, "delayed: conn->refcnt=%d conn->n_control=%d\n", + atomic_read(&cp->refcnt), atomic_read(&cp->n_control)); + atomic_inc(&cp->refcnt); + cp->timeout = 60*HZ; + if (ipvs->sync_state & IP_VS_STATE_MASTER) ip_vs_sync_conn(net, cp, sysctl_sync_threshold(ipvs)); ip_vs_conn_put(cp); } - +/* Modify timer, so that it expires as soon as possible. + * Can be called without reference only if under RCU lock. + */ void ip_vs_conn_expire_now(struct ip_vs_conn *cp) { - if (del_timer(&cp->timer)) - mod_timer(&cp->timer, jiffies); + /* Using mod_timer_pending will ensure the timer is not + * modified after the final del_timer in ip_vs_conn_expire. + */ + if (timer_pending(&cp->timer) && + time_after(cp->timer.expires, jiffies)) + mod_timer_pending(&cp->timer, jiffies); } @@ -858,7 +858,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net, p->protocol); - cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC); + cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC); if (cp == NULL) { IP_VS_ERR_RL("%s(): no memory\n", __func__); return NULL; @@ -869,13 +869,13 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, ip_vs_conn_net_set(cp, p->net); cp->af = p->af; cp->protocol = p->protocol; - ip_vs_addr_copy(p->af, &cp->caddr, p->caddr); + ip_vs_addr_set(p->af, &cp->caddr, p->caddr); cp->cport = p->cport; - ip_vs_addr_copy(p->af, &cp->vaddr, p->vaddr); + ip_vs_addr_set(p->af, &cp->vaddr, p->vaddr); cp->vport = p->vport; /* proto should only be IPPROTO_IP if d_addr is a fwmark */ - ip_vs_addr_copy(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af, - &cp->daddr, daddr); + ip_vs_addr_set(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af, + &cp->daddr, daddr); cp->dport = dport; cp->flags = flags; cp->fwmark = fwmark; @@ -884,6 +884,10 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, cp->pe = p->pe; cp->pe_data = p->pe_data; cp->pe_data_len = p->pe_data_len; + } else { + cp->pe = NULL; + cp->pe_data = NULL; + cp->pe_data_len = 0; } spin_lock_init(&cp->lock); @@ -894,18 +898,28 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, */ atomic_set(&cp->refcnt, 1); + cp->control = NULL; atomic_set(&cp->n_control, 0); atomic_set(&cp->in_pkts, 0); + cp->packet_xmit = NULL; + cp->app = NULL; + cp->app_data = NULL; + /* reset struct ip_vs_seq */ + cp->in_seq.delta = 0; + cp->out_seq.delta = 0; + atomic_inc(&ipvs->conn_count); if (flags & IP_VS_CONN_F_NO_CPORT) atomic_inc(&ip_vs_conn_no_cport_cnt); /* Bind the connection with a destination server */ + cp->dest = NULL; ip_vs_bind_dest(cp, dest); /* Set its state and timeout */ cp->state = 0; + cp->old_state = 0; cp->timeout = 3*HZ; cp->sync_endtime = jiffies & ~3UL; @@ -952,14 +966,17 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) struct ip_vs_iter_state *iter = seq->private; for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { - ct_read_lock_bh(idx); - hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { + rcu_read_lock(); + hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { + /* __ip_vs_conn_get() is not needed by + * ip_vs_conn_seq_show and ip_vs_conn_sync_seq_show + */ if (pos-- == 0) { iter->l = &ip_vs_conn_tab[idx]; return cp; } } - ct_read_unlock_bh(idx); + rcu_read_unlock(); } return NULL; @@ -977,6 +994,7 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct ip_vs_conn *cp = v; struct ip_vs_iter_state *iter = seq->private; + struct hlist_node *e; struct hlist_head *l = iter->l; int idx; @@ -985,19 +1003,19 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) return ip_vs_conn_array(seq, 0); /* more on same hash chain? */ - if (cp->c_list.next) - return hlist_entry(cp->c_list.next, struct ip_vs_conn, c_list); + e = rcu_dereference(hlist_next_rcu(&cp->c_list)); + if (e) + return hlist_entry(e, struct ip_vs_conn, c_list); + rcu_read_unlock(); idx = l - ip_vs_conn_tab; - ct_read_unlock_bh(idx); - while (++idx < ip_vs_conn_tab_size) { - ct_read_lock_bh(idx); - hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { + rcu_read_lock(); + hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { iter->l = &ip_vs_conn_tab[idx]; return cp; } - ct_read_unlock_bh(idx); + rcu_read_unlock(); } iter->l = NULL; return NULL; @@ -1009,7 +1027,7 @@ static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v) struct hlist_head *l = iter->l; if (l) - ct_read_unlock_bh(l - ip_vs_conn_tab); + rcu_read_unlock(); } static int ip_vs_conn_seq_show(struct seq_file *seq, void *v) @@ -1188,7 +1206,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp) void ip_vs_random_dropentry(struct net *net) { int idx; - struct ip_vs_conn *cp; + struct ip_vs_conn *cp, *cp_c; /* * Randomly scan 1/32 of the whole table every second @@ -1199,9 +1217,9 @@ void ip_vs_random_dropentry(struct net *net) /* * Lock is actually needed in this loop. */ - ct_write_lock_bh(hash); + rcu_read_lock(); - hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { + hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { if (cp->flags & IP_VS_CONN_F_TEMPLATE) /* connection template */ continue; @@ -1228,12 +1246,15 @@ void ip_vs_random_dropentry(struct net *net) IP_VS_DBG(4, "del connection\n"); ip_vs_conn_expire_now(cp); - if (cp->control) { + cp_c = cp->control; + /* cp->control is valid only with reference to cp */ + if (cp_c && __ip_vs_conn_get(cp)) { IP_VS_DBG(4, "del conn template\n"); - ip_vs_conn_expire_now(cp->control); + ip_vs_conn_expire_now(cp_c); + __ip_vs_conn_put(cp); } } - ct_write_unlock_bh(hash); + rcu_read_unlock(); } } @@ -1244,7 +1265,7 @@ void ip_vs_random_dropentry(struct net *net) static void ip_vs_conn_flush(struct net *net) { int idx; - struct ip_vs_conn *cp; + struct ip_vs_conn *cp, *cp_c; struct netns_ipvs *ipvs = net_ipvs(net); flush_again: @@ -1252,19 +1273,22 @@ flush_again: /* * Lock is actually needed in this loop. */ - ct_write_lock_bh(idx); + rcu_read_lock(); - hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { + hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { if (!ip_vs_conn_net_eq(cp, net)) continue; IP_VS_DBG(4, "del connection\n"); ip_vs_conn_expire_now(cp); - if (cp->control) { + cp_c = cp->control; + /* cp->control is valid only with reference to cp */ + if (cp_c && __ip_vs_conn_get(cp)) { IP_VS_DBG(4, "del conn template\n"); - ip_vs_conn_expire_now(cp->control); + ip_vs_conn_expire_now(cp_c); + __ip_vs_conn_put(cp); } } - ct_write_unlock_bh(idx); + rcu_read_unlock(); } /* the counter may be not NULL, because maybe some conn entries @@ -1331,7 +1355,7 @@ int __init ip_vs_conn_init(void) INIT_HLIST_HEAD(&ip_vs_conn_tab[idx]); for (idx = 0; idx < CT_LOCKARRAY_SIZE; idx++) { - rwlock_init(&__ip_vs_conntbl_lock_array[idx].l); + spin_lock_init(&__ip_vs_conntbl_lock_array[idx].l); } /* calculate the random value for connection hash */ @@ -1342,6 +1366,8 @@ int __init ip_vs_conn_init(void) void ip_vs_conn_cleanup(void) { + /* Wait all ip_vs_conn_rcu_free() callbacks to complete */ + rcu_barrier(); /* Release the empty cache */ kmem_cache_destroy(ip_vs_conn_cachep); vfree(ip_vs_conn_tab); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 2aef23ed748b..f26fe3353a30 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -203,7 +203,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc, { ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr, vport, p); - p->pe = svc->pe; + p->pe = rcu_dereference(svc->pe); if (p->pe && p->pe->fill_param) return p->pe->fill_param(p, skb); @@ -296,12 +296,15 @@ ip_vs_sched_persist(struct ip_vs_service *svc, /* Check if a template already exists */ ct = ip_vs_ct_in_get(¶m); if (!ct || !ip_vs_check_template(ct)) { + struct ip_vs_scheduler *sched; + /* * No template found or the dest of the connection * template is not available. * return *ignored=0 i.e. ICMP and NF_DROP */ - dest = svc->scheduler->schedule(svc, skb); + sched = rcu_dereference(svc->scheduler); + dest = sched->schedule(svc, skb); if (!dest) { IP_VS_DBG(1, "p-schedule: no dest found.\n"); kfree(param.pe_data); @@ -391,6 +394,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, { struct ip_vs_protocol *pp = pd->pp; struct ip_vs_conn *cp = NULL; + struct ip_vs_scheduler *sched; struct ip_vs_dest *dest; __be16 _ports[2], *pptr; unsigned int flags; @@ -446,7 +450,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, return NULL; } - dest = svc->scheduler->schedule(svc, skb); + sched = rcu_dereference(svc->scheduler); + dest = sched->schedule(svc, skb); if (dest == NULL) { IP_VS_DBG(1, "Schedule: no dest found.\n"); return NULL; @@ -504,7 +509,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph); if (pptr == NULL) { - ip_vs_service_put(svc); return NF_DROP; } @@ -530,8 +534,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, IP_VS_CONN_F_ONE_PACKET : 0; union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; - ip_vs_service_put(svc); - /* create a new connection entry */ IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); { @@ -568,12 +570,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, * listed in the ipvs table), pass the packets, because it is * not ipvs job to decide to drop the packets. */ - if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) { - ip_vs_service_put(svc); + if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) return NF_ACCEPT; - } - - ip_vs_service_put(svc); /* * Notify the client that the destination is unreachable, and @@ -640,8 +638,11 @@ static inline enum ip_defrag_users ip_vs_defrag_user(unsigned int hooknum) static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) { - int err = ip_defrag(skb, user); + int err; + local_bh_disable(); + err = ip_defrag(skb, user); + local_bh_enable(); if (!err) ip_send_check(ip_hdr(skb)); @@ -1161,9 +1162,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) sizeof(_ports), _ports, &iph); if (pptr == NULL) return NF_ACCEPT; /* Not for me */ - if (ip_vs_lookup_real_service(net, af, iph.protocol, - &iph.saddr, - pptr[0])) { + if (ip_vs_has_real_service(net, af, iph.protocol, &iph.saddr, + pptr[0])) { /* * Notify the real server: there is no * existing entry if it is not RST @@ -1220,13 +1220,7 @@ ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - unsigned int verdict; - - /* Disable BH in LOCAL_OUT until all places are fixed */ - local_bh_disable(); - verdict = ip_vs_out(hooknum, skb, AF_INET); - local_bh_enable(); - return verdict; + return ip_vs_out(hooknum, skb, AF_INET); } #ifdef CONFIG_IP_VS_IPV6 @@ -1253,13 +1247,7 @@ ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - unsigned int verdict; - - /* Disable BH in LOCAL_OUT until all places are fixed */ - local_bh_disable(); - verdict = ip_vs_out(hooknum, skb, AF_INET6); - local_bh_enable(); - return verdict; + return ip_vs_out(hooknum, skb, AF_INET6); } #endif @@ -1395,10 +1383,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) goto ignore_ipip; /* Prefer the resulting PMTU */ if (dest) { - spin_lock(&dest->dst_lock); - if (dest->dst_cache) - mtu = dst_mtu(dest->dst_cache); - spin_unlock(&dest->dst_lock); + struct ip_vs_dest_dst *dest_dst; + + rcu_read_lock(); + dest_dst = rcu_dereference(dest->dest_dst); + if (dest_dst) + mtu = dst_mtu(dest_dst->dst_cache); + rcu_read_unlock(); } if (mtu > 68 + sizeof(struct iphdr)) mtu -= sizeof(struct iphdr); @@ -1714,13 +1705,7 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - unsigned int verdict; - - /* Disable BH in LOCAL_OUT until all places are fixed */ - local_bh_disable(); - verdict = ip_vs_in(hooknum, skb, AF_INET); - local_bh_enable(); - return verdict; + return ip_vs_in(hooknum, skb, AF_INET); } #ifdef CONFIG_IP_VS_IPV6 @@ -1779,13 +1764,7 @@ ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - unsigned int verdict; - - /* Disable BH in LOCAL_OUT until all places are fixed */ - local_bh_disable(); - verdict = ip_vs_in(hooknum, skb, AF_INET6); - local_bh_enable(); - return verdict; + return ip_vs_in(hooknum, skb, AF_INET6); } #endif diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 8104120f16a5..9e4074c26dc2 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -55,9 +55,6 @@ /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */ static DEFINE_MUTEX(__ip_vs_mutex); -/* lock for service table */ -static DEFINE_RWLOCK(__ip_vs_svc_lock); - /* sysctl variables */ #ifdef CONFIG_IP_VS_DEBUG @@ -71,7 +68,7 @@ int ip_vs_get_debug_level(void) /* Protos */ -static void __ip_vs_del_service(struct ip_vs_service *svc); +static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup); #ifdef CONFIG_IP_VS_IPV6 @@ -257,9 +254,9 @@ ip_vs_use_count_dec(void) #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1) /* the service table hashed by <protocol, addr, port> */ -static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; +static struct hlist_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; /* the service table hashed by fwmark */ -static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; +static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; /* @@ -314,13 +311,13 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) */ hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol, &svc->addr, svc->port); - list_add(&svc->s_list, &ip_vs_svc_table[hash]); + hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]); } else { /* * Hash it by fwmark in svc_fwm_table */ hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark); - list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]); + hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]); } svc->flags |= IP_VS_SVC_F_HASHED; @@ -344,10 +341,10 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc) if (svc->fwmark == 0) { /* Remove it from the svc_table table */ - list_del(&svc->s_list); + hlist_del_rcu(&svc->s_list); } else { /* Remove it from the svc_fwm_table table */ - list_del(&svc->f_list); + hlist_del_rcu(&svc->f_list); } svc->flags &= ~IP_VS_SVC_F_HASHED; @@ -369,7 +366,7 @@ __ip_vs_service_find(struct net *net, int af, __u16 protocol, /* Check for "full" addressed entries */ hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport); - list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){ + hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) { if ((svc->af == af) && ip_vs_addr_equal(af, &svc->addr, vaddr) && (svc->port == vport) @@ -396,7 +393,7 @@ __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark) /* Check for fwmark addressed entries */ hash = ip_vs_svc_fwm_hashkey(net, fwmark); - list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) { + hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) { if (svc->fwmark == fwmark && svc->af == af && net_eq(svc->net, net)) { /* HIT */ @@ -407,15 +404,14 @@ __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark) return NULL; } +/* Find service, called under RCU lock */ struct ip_vs_service * -ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, - const union nf_inet_addr *vaddr, __be16 vport) +ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol, + const union nf_inet_addr *vaddr, __be16 vport) { struct ip_vs_service *svc; struct netns_ipvs *ipvs = net_ipvs(net); - read_lock(&__ip_vs_svc_lock); - /* * Check the table hashed by fwmark first */ @@ -451,10 +447,6 @@ ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, } out: - if (svc) - atomic_inc(&svc->usecnt); - read_unlock(&__ip_vs_svc_lock); - IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n", fwmark, ip_vs_proto_name(protocol), IP_VS_DBG_ADDR(af, vaddr), ntohs(vport), @@ -471,6 +463,13 @@ __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc) dest->svc = svc; } +static void ip_vs_service_free(struct ip_vs_service *svc) +{ + if (svc->stats.cpustats) + free_percpu(svc->stats.cpustats); + kfree(svc); +} + static void __ip_vs_unbind_svc(struct ip_vs_dest *dest) { @@ -478,12 +477,11 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest) dest->svc = NULL; if (atomic_dec_and_test(&svc->refcnt)) { - IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n", + IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n", svc->fwmark, IP_VS_DBG_ADDR(svc->af, &svc->addr), - ntohs(svc->port), atomic_read(&svc->usecnt)); - free_percpu(svc->stats.cpustats); - kfree(svc); + ntohs(svc->port)); + ip_vs_service_free(svc); } } @@ -508,17 +506,13 @@ static inline unsigned int ip_vs_rs_hashkey(int af, & IP_VS_RTAB_MASK; } -/* - * Hashes ip_vs_dest in rs_table by <proto,addr,port>. - * should be called with locked tables. - */ -static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest) +/* Hash ip_vs_dest in rs_table by <proto,addr,port>. */ +static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest) { unsigned int hash; - if (!list_empty(&dest->d_list)) { - return 0; - } + if (dest->in_rs_table) + return; /* * Hash by proto,addr,port, @@ -526,64 +520,51 @@ static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest) */ hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port); - list_add(&dest->d_list, &ipvs->rs_table[hash]); - - return 1; + hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]); + dest->in_rs_table = 1; } -/* - * UNhashes ip_vs_dest from rs_table. - * should be called with locked tables. - */ -static int ip_vs_rs_unhash(struct ip_vs_dest *dest) +/* Unhash ip_vs_dest from rs_table. */ +static void ip_vs_rs_unhash(struct ip_vs_dest *dest) { /* * Remove it from the rs_table table. */ - if (!list_empty(&dest->d_list)) { - list_del_init(&dest->d_list); + if (dest->in_rs_table) { + hlist_del_rcu(&dest->d_list); + dest->in_rs_table = 0; } - - return 1; } -/* - * Lookup real service by <proto,addr,port> in the real service table. - */ -struct ip_vs_dest * -ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol, - const union nf_inet_addr *daddr, - __be16 dport) +/* Check if real service by <proto,addr,port> is present */ +bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol, + const union nf_inet_addr *daddr, __be16 dport) { struct netns_ipvs *ipvs = net_ipvs(net); unsigned int hash; struct ip_vs_dest *dest; - /* - * Check for "full" addressed entries - * Return the first found entry - */ + /* Check for "full" addressed entries */ hash = ip_vs_rs_hashkey(af, daddr, dport); - read_lock(&ipvs->rs_lock); - list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) { - if ((dest->af == af) - && ip_vs_addr_equal(af, &dest->addr, daddr) - && (dest->port == dport) - && ((dest->protocol == protocol) || - dest->vfwmark)) { + rcu_read_lock(); + hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) { + if (dest->port == dport && + dest->af == af && + ip_vs_addr_equal(af, &dest->addr, daddr) && + (dest->protocol == protocol || dest->vfwmark)) { /* HIT */ - read_unlock(&ipvs->rs_lock); - return dest; + rcu_read_unlock(); + return true; } } - read_unlock(&ipvs->rs_lock); + rcu_read_unlock(); - return NULL; + return false; } -/* - * Lookup destination by {addr,port} in the given service +/* Lookup destination by {addr,port} in the given service + * Called under RCU lock. */ static struct ip_vs_dest * ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, @@ -594,7 +575,7 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, /* * Find the destination for the given service */ - list_for_each_entry(dest, &svc->destinations, n_list) { + list_for_each_entry_rcu(dest, &svc->destinations, n_list) { if ((dest->af == svc->af) && ip_vs_addr_equal(svc->af, &dest->addr, daddr) && (dest->port == dport)) { @@ -608,13 +589,11 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, /* * Find destination by {daddr,dport,vaddr,protocol} - * Cretaed to be used in ip_vs_process_message() in + * Created to be used in ip_vs_process_message() in * the backup synchronization daemon. It finds the * destination to be bound to the received connection * on the backup. - * - * ip_vs_lookup_real_service() looked promissing, but - * seems not working as expected. + * Called under RCU lock, no refcnt is returned. */ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr, @@ -627,7 +606,7 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, struct ip_vs_service *svc; __be16 port = dport; - svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport); + svc = ip_vs_service_find(net, af, fwmark, protocol, vaddr, vport); if (!svc) return NULL; if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) @@ -635,12 +614,31 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, dest = ip_vs_lookup_dest(svc, daddr, port); if (!dest) dest = ip_vs_lookup_dest(svc, daddr, port ^ dport); - if (dest) - atomic_inc(&dest->refcnt); - ip_vs_service_put(svc); return dest; } +void ip_vs_dest_dst_rcu_free(struct rcu_head *head) +{ + struct ip_vs_dest_dst *dest_dst = container_of(head, + struct ip_vs_dest_dst, + rcu_head); + + dst_release(dest_dst->dst_cache); + kfree(dest_dst); +} + +/* Release dest_dst and dst_cache for dest in user context */ +static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest) +{ + struct ip_vs_dest_dst *old; + + old = rcu_dereference_protected(dest->dest_dst, 1); + if (old) { + RCU_INIT_POINTER(dest->dest_dst, NULL); + call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free); + } +} + /* * Lookup dest by {svc,addr,port} in the destination trash. * The destination trash is used to hold the destinations that are removed @@ -655,19 +653,25 @@ static struct ip_vs_dest * ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, __be16 dport) { - struct ip_vs_dest *dest, *nxt; + struct ip_vs_dest *dest; struct netns_ipvs *ipvs = net_ipvs(svc->net); /* * Find the destination in trash */ - list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) { + spin_lock_bh(&ipvs->dest_trash_lock); + list_for_each_entry(dest, &ipvs->dest_trash, t_list) { IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, " "dest->refcnt=%d\n", dest->vfwmark, IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port), atomic_read(&dest->refcnt)); + /* We can not reuse dest while in grace period + * because conns still can use dest->svc + */ + if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state)) + continue; if (dest->af == svc->af && ip_vs_addr_equal(svc->af, &dest->addr, daddr) && dest->port == dport && @@ -677,29 +681,27 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) && dest->vport == svc->port))) { /* HIT */ - return dest; - } - - /* - * Try to purge the destination from trash if not referenced - */ - if (atomic_read(&dest->refcnt) == 1) { - IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u " - "from trash\n", - dest->vfwmark, - IP_VS_DBG_ADDR(svc->af, &dest->addr), - ntohs(dest->port)); - list_del(&dest->n_list); - ip_vs_dst_reset(dest); - __ip_vs_unbind_svc(dest); - free_percpu(dest->stats.cpustats); - kfree(dest); + list_del(&dest->t_list); + ip_vs_dest_hold(dest); + goto out; } } - return NULL; + dest = NULL; + +out: + spin_unlock_bh(&ipvs->dest_trash_lock); + + return dest; } +static void ip_vs_dest_free(struct ip_vs_dest *dest) +{ + __ip_vs_dst_cache_reset(dest); + __ip_vs_unbind_svc(dest); + free_percpu(dest->stats.cpustats); + kfree(dest); +} /* * Clean up all the destinations in the trash @@ -708,19 +710,18 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, * When the ip_vs_control_clearup is activated by ipvs module exit, * the service tables must have been flushed and all the connections * are expired, and the refcnt of each destination in the trash must - * be 1, so we simply release them here. + * be 0, so we simply release them here. */ static void ip_vs_trash_cleanup(struct net *net) { struct ip_vs_dest *dest, *nxt; struct netns_ipvs *ipvs = net_ipvs(net); - list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) { - list_del(&dest->n_list); - ip_vs_dst_reset(dest); - __ip_vs_unbind_svc(dest); - free_percpu(dest->stats.cpustats); - kfree(dest); + del_timer_sync(&ipvs->dest_trash_timer); + /* No need to use dest_trash_lock */ + list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, t_list) { + list_del(&dest->t_list); + ip_vs_dest_free(dest); } } @@ -770,6 +771,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest, int add) { struct netns_ipvs *ipvs = net_ipvs(svc->net); + struct ip_vs_scheduler *sched; int conn_flags; /* set the weight and the flags */ @@ -785,9 +787,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, * Put the real service in rs_table if not present. * For now only for NAT! */ - write_lock_bh(&ipvs->rs_lock); ip_vs_rs_hash(ipvs, dest); - write_unlock_bh(&ipvs->rs_lock); } atomic_set(&dest->conn_flags, conn_flags); @@ -811,27 +811,20 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, dest->l_threshold = udest->l_threshold; spin_lock_bh(&dest->dst_lock); - ip_vs_dst_reset(dest); + __ip_vs_dst_cache_reset(dest); spin_unlock_bh(&dest->dst_lock); - if (add) - ip_vs_start_estimator(svc->net, &dest->stats); - - write_lock_bh(&__ip_vs_svc_lock); - - /* Wait until all other svc users go away */ - IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); - + sched = rcu_dereference_protected(svc->scheduler, 1); if (add) { - list_add(&dest->n_list, &svc->destinations); + ip_vs_start_estimator(svc->net, &dest->stats); + list_add_rcu(&dest->n_list, &svc->destinations); svc->num_dests++; + if (sched->add_dest) + sched->add_dest(svc, dest); + } else { + if (sched->upd_dest) + sched->upd_dest(svc, dest); } - - /* call the update_service, because server weight may be changed */ - if (svc->scheduler->update_service) - svc->scheduler->update_service(svc); - - write_unlock_bh(&__ip_vs_svc_lock); } @@ -883,7 +876,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, atomic_set(&dest->persistconns, 0); atomic_set(&dest->refcnt, 1); - INIT_LIST_HEAD(&dest->d_list); + INIT_HLIST_NODE(&dest->d_list); spin_lock_init(&dest->dst_lock); spin_lock_init(&dest->stats.lock); __ip_vs_update_dest(svc, dest, udest, 1); @@ -925,10 +918,10 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) ip_vs_addr_copy(svc->af, &daddr, &udest->addr); - /* - * Check if the dest already exists in the list - */ + /* We use function that requires RCU lock */ + rcu_read_lock(); dest = ip_vs_lookup_dest(svc, &daddr, dport); + rcu_read_unlock(); if (dest != NULL) { IP_VS_DBG(1, "%s(): dest already exists\n", __func__); @@ -950,11 +943,6 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) IP_VS_DBG_ADDR(svc->af, &dest->vaddr), ntohs(dest->vport)); - /* - * Get the destination from the trash - */ - list_del(&dest->n_list); - __ip_vs_update_dest(svc, dest, udest, 1); ret = 0; } else { @@ -994,10 +982,10 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) ip_vs_addr_copy(svc->af, &daddr, &udest->addr); - /* - * Lookup the destination list - */ + /* We use function that requires RCU lock */ + rcu_read_lock(); dest = ip_vs_lookup_dest(svc, &daddr, dport); + rcu_read_unlock(); if (dest == NULL) { IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__); @@ -1010,11 +998,21 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) return 0; } +static void ip_vs_dest_wait_readers(struct rcu_head *head) +{ + struct ip_vs_dest *dest = container_of(head, struct ip_vs_dest, + rcu_head); + + /* End of grace period after unlinking */ + clear_bit(IP_VS_DEST_STATE_REMOVING, &dest->state); +} + /* * Delete a destination (must be already unlinked from the service) */ -static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) +static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest, + bool cleanup) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -1023,38 +1021,24 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) /* * Remove it from the d-linked list with the real services. */ - write_lock_bh(&ipvs->rs_lock); ip_vs_rs_unhash(dest); - write_unlock_bh(&ipvs->rs_lock); - /* - * Decrease the refcnt of the dest, and free the dest - * if nobody refers to it (refcnt=0). Otherwise, throw - * the destination into the trash. - */ - if (atomic_dec_and_test(&dest->refcnt)) { - IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n", - dest->vfwmark, - IP_VS_DBG_ADDR(dest->af, &dest->addr), - ntohs(dest->port)); - ip_vs_dst_reset(dest); - /* simply decrease svc->refcnt here, let the caller check - and release the service if nobody refers to it. - Only user context can release destination and service, - and only one user context can update virtual service at a - time, so the operation here is OK */ - atomic_dec(&dest->svc->refcnt); - free_percpu(dest->stats.cpustats); - kfree(dest); - } else { - IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, " - "dest->refcnt=%d\n", - IP_VS_DBG_ADDR(dest->af, &dest->addr), - ntohs(dest->port), - atomic_read(&dest->refcnt)); - list_add(&dest->n_list, &ipvs->dest_trash); - atomic_inc(&dest->refcnt); + if (!cleanup) { + set_bit(IP_VS_DEST_STATE_REMOVING, &dest->state); + call_rcu(&dest->rcu_head, ip_vs_dest_wait_readers); } + + spin_lock_bh(&ipvs->dest_trash_lock); + IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n", + IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), + atomic_read(&dest->refcnt)); + if (list_empty(&ipvs->dest_trash) && !cleanup) + mod_timer(&ipvs->dest_trash_timer, + jiffies + IP_VS_DEST_TRASH_PERIOD); + /* dest lives in trash without reference */ + list_add(&dest->t_list, &ipvs->dest_trash); + spin_unlock_bh(&ipvs->dest_trash_lock); + ip_vs_dest_put(dest); } @@ -1070,14 +1054,16 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc, /* * Remove it from the d-linked destination list. */ - list_del(&dest->n_list); + list_del_rcu(&dest->n_list); svc->num_dests--; - /* - * Call the update_service function of its scheduler - */ - if (svcupd && svc->scheduler->update_service) - svc->scheduler->update_service(svc); + if (svcupd) { + struct ip_vs_scheduler *sched; + + sched = rcu_dereference_protected(svc->scheduler, 1); + if (sched->del_dest) + sched->del_dest(svc, dest); + } } @@ -1092,37 +1078,56 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) EnterFunction(2); + /* We use function that requires RCU lock */ + rcu_read_lock(); dest = ip_vs_lookup_dest(svc, &udest->addr, dport); + rcu_read_unlock(); if (dest == NULL) { IP_VS_DBG(1, "%s(): destination not found!\n", __func__); return -ENOENT; } - write_lock_bh(&__ip_vs_svc_lock); - - /* - * Wait until all other svc users go away. - */ - IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); - /* * Unlink dest from the service */ __ip_vs_unlink_dest(svc, dest, 1); - write_unlock_bh(&__ip_vs_svc_lock); - /* * Delete the destination */ - __ip_vs_del_dest(svc->net, dest); + __ip_vs_del_dest(svc->net, dest, false); LeaveFunction(2); return 0; } +static void ip_vs_dest_trash_expire(unsigned long data) +{ + struct net *net = (struct net *) data; + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_dest *dest, *next; + + spin_lock(&ipvs->dest_trash_lock); + list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) { + /* Skip if dest is in grace period */ + if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state)) + continue; + if (atomic_read(&dest->refcnt) > 0) + continue; + IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n", + dest->vfwmark, + IP_VS_DBG_ADDR(dest->svc->af, &dest->addr), + ntohs(dest->port)); + list_del(&dest->t_list); + ip_vs_dest_free(dest); + } + if (!list_empty(&ipvs->dest_trash)) + mod_timer(&ipvs->dest_trash_timer, + jiffies + IP_VS_DEST_TRASH_PERIOD); + spin_unlock(&ipvs->dest_trash_lock); +} /* * Add a service into the service hash table @@ -1178,7 +1183,6 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, } /* I'm the first user of the service */ - atomic_set(&svc->usecnt, 0); atomic_set(&svc->refcnt, 0); svc->af = u->af; @@ -1192,7 +1196,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, svc->net = net; INIT_LIST_HEAD(&svc->destinations); - rwlock_init(&svc->sched_lock); + spin_lock_init(&svc->sched_lock); spin_lock_init(&svc->stats.lock); /* Bind the scheduler */ @@ -1202,7 +1206,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, sched = NULL; /* Bind the ct retriever */ - ip_vs_bind_pe(svc, pe); + RCU_INIT_POINTER(svc->pe, pe); pe = NULL; /* Update the virtual service counters */ @@ -1218,9 +1222,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, ipvs->num_services++; /* Hash the service into the service table */ - write_lock_bh(&__ip_vs_svc_lock); ip_vs_svc_hash(svc); - write_unlock_bh(&__ip_vs_svc_lock); *svc_p = svc; /* Now there is a service - full throttle */ @@ -1230,15 +1232,8 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, out_err: if (svc != NULL) { - ip_vs_unbind_scheduler(svc); - if (svc->inc) { - local_bh_disable(); - ip_vs_app_inc_put(svc->inc); - local_bh_enable(); - } - if (svc->stats.cpustats) - free_percpu(svc->stats.cpustats); - kfree(svc); + ip_vs_unbind_scheduler(svc, sched); + ip_vs_service_free(svc); } ip_vs_scheduler_put(sched); ip_vs_pe_put(pe); @@ -1288,12 +1283,17 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) } #endif - write_lock_bh(&__ip_vs_svc_lock); - - /* - * Wait until all other svc users go away. - */ - IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); + old_sched = rcu_dereference_protected(svc->scheduler, 1); + if (sched != old_sched) { + /* Bind the new scheduler */ + ret = ip_vs_bind_scheduler(svc, sched); + if (ret) { + old_sched = sched; + goto out; + } + /* Unbind the old scheduler on success */ + ip_vs_unbind_scheduler(svc, old_sched); + } /* * Set the flags and timeout value @@ -1302,57 +1302,30 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) svc->timeout = u->timeout * HZ; svc->netmask = u->netmask; - old_sched = svc->scheduler; - if (sched != old_sched) { - /* - * Unbind the old scheduler - */ - if ((ret = ip_vs_unbind_scheduler(svc))) { - old_sched = sched; - goto out_unlock; - } - - /* - * Bind the new scheduler - */ - if ((ret = ip_vs_bind_scheduler(svc, sched))) { - /* - * If ip_vs_bind_scheduler fails, restore the old - * scheduler. - * The main reason of failure is out of memory. - * - * The question is if the old scheduler can be - * restored all the time. TODO: if it cannot be - * restored some time, we must delete the service, - * otherwise the system may crash. - */ - ip_vs_bind_scheduler(svc, old_sched); - old_sched = sched; - goto out_unlock; - } - } + old_pe = rcu_dereference_protected(svc->pe, 1); + if (pe != old_pe) + rcu_assign_pointer(svc->pe, pe); - old_pe = svc->pe; - if (pe != old_pe) { - ip_vs_unbind_pe(svc); - ip_vs_bind_pe(svc, pe); - } - -out_unlock: - write_unlock_bh(&__ip_vs_svc_lock); out: ip_vs_scheduler_put(old_sched); ip_vs_pe_put(old_pe); return ret; } +static void ip_vs_service_rcu_free(struct rcu_head *head) +{ + struct ip_vs_service *svc; + + svc = container_of(head, struct ip_vs_service, rcu_head); + ip_vs_service_free(svc); +} /* * Delete a service from the service list * - The service must be unlinked, unlocked and not referenced! * - We are called under _bh lock */ -static void __ip_vs_del_service(struct ip_vs_service *svc) +static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) { struct ip_vs_dest *dest, *nxt; struct ip_vs_scheduler *old_sched; @@ -1368,27 +1341,20 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) ip_vs_stop_estimator(svc->net, &svc->stats); /* Unbind scheduler */ - old_sched = svc->scheduler; - ip_vs_unbind_scheduler(svc); + old_sched = rcu_dereference_protected(svc->scheduler, 1); + ip_vs_unbind_scheduler(svc, old_sched); ip_vs_scheduler_put(old_sched); - /* Unbind persistence engine */ - old_pe = svc->pe; - ip_vs_unbind_pe(svc); + /* Unbind persistence engine, keep svc->pe */ + old_pe = rcu_dereference_protected(svc->pe, 1); ip_vs_pe_put(old_pe); - /* Unbind app inc */ - if (svc->inc) { - ip_vs_app_inc_put(svc->inc); - svc->inc = NULL; - } - /* * Unlink the whole destination list */ list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) { __ip_vs_unlink_dest(svc, dest, 0); - __ip_vs_del_dest(svc->net, dest); + __ip_vs_del_dest(svc->net, dest, cleanup); } /* @@ -1402,13 +1368,12 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) /* * Free the service if nobody refers to it */ - if (atomic_read(&svc->refcnt) == 0) { - IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n", + if (atomic_dec_and_test(&svc->refcnt)) { + IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n", svc->fwmark, IP_VS_DBG_ADDR(svc->af, &svc->addr), - ntohs(svc->port), atomic_read(&svc->usecnt)); - free_percpu(svc->stats.cpustats); - kfree(svc); + ntohs(svc->port)); + call_rcu(&svc->rcu_head, ip_vs_service_rcu_free); } /* decrease the module use count */ @@ -1418,23 +1383,16 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) /* * Unlink a service from list and try to delete it if its refcnt reached 0 */ -static void ip_vs_unlink_service(struct ip_vs_service *svc) +static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup) { + /* Hold svc to avoid double release from dest_trash */ + atomic_inc(&svc->refcnt); /* * Unhash it from the service table */ - write_lock_bh(&__ip_vs_svc_lock); - ip_vs_svc_unhash(svc); - /* - * Wait until all the svc users go away. - */ - IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); - - __ip_vs_del_service(svc); - - write_unlock_bh(&__ip_vs_svc_lock); + __ip_vs_del_service(svc, cleanup); } /* @@ -1444,7 +1402,7 @@ static int ip_vs_del_service(struct ip_vs_service *svc) { if (svc == NULL) return -EEXIST; - ip_vs_unlink_service(svc); + ip_vs_unlink_service(svc, false); return 0; } @@ -1453,19 +1411,20 @@ static int ip_vs_del_service(struct ip_vs_service *svc) /* * Flush all the virtual services */ -static int ip_vs_flush(struct net *net) +static int ip_vs_flush(struct net *net, bool cleanup) { int idx; - struct ip_vs_service *svc, *nxt; + struct ip_vs_service *svc; + struct hlist_node *n; /* * Flush the service table hashed by <netns,protocol,addr,port> */ for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], - s_list) { + hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx], + s_list) { if (net_eq(svc->net, net)) - ip_vs_unlink_service(svc); + ip_vs_unlink_service(svc, cleanup); } } @@ -1473,10 +1432,10 @@ static int ip_vs_flush(struct net *net) * Flush the service table hashed by fwmark */ for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry_safe(svc, nxt, - &ip_vs_svc_fwm_table[idx], f_list) { + hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx], + f_list) { if (net_eq(svc->net, net)) - ip_vs_unlink_service(svc); + ip_vs_unlink_service(svc, cleanup); } } @@ -1492,32 +1451,29 @@ void ip_vs_service_net_cleanup(struct net *net) EnterFunction(2); /* Check for "full" addressed entries */ mutex_lock(&__ip_vs_mutex); - ip_vs_flush(net); + ip_vs_flush(net, true); mutex_unlock(&__ip_vs_mutex); LeaveFunction(2); } -/* - * Release dst hold by dst_cache - */ + +/* Put all references for device (dst_cache) */ static inline void -__ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev) +ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev) { spin_lock_bh(&dest->dst_lock); - if (dest->dst_cache && dest->dst_cache->dev == dev) { + if (dest->dest_dst && dest->dest_dst->dst_cache->dev == dev) { IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n", dev->name, IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), atomic_read(&dest->refcnt)); - ip_vs_dst_reset(dest); + __ip_vs_dst_cache_reset(dest); } spin_unlock_bh(&dest->dst_lock); } -/* - * Netdev event receiver - * Currently only NETDEV_UNREGISTER is handled, i.e. if we hold a reference to - * a device that is "unregister" it must be released. +/* Netdev event receiver + * Currently only NETDEV_DOWN is handled to release refs to cached dsts */ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, void *ptr) @@ -1529,35 +1485,37 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, struct ip_vs_dest *dest; unsigned int idx; - if (event != NETDEV_UNREGISTER || !ipvs) + if (event != NETDEV_DOWN || !ipvs) return NOTIFY_DONE; IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name); EnterFunction(2); mutex_lock(&__ip_vs_mutex); for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { + hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { if (net_eq(svc->net, net)) { list_for_each_entry(dest, &svc->destinations, n_list) { - __ip_vs_dev_reset(dest, dev); + ip_vs_forget_dev(dest, dev); } } } - list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { + hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { if (net_eq(svc->net, net)) { list_for_each_entry(dest, &svc->destinations, n_list) { - __ip_vs_dev_reset(dest, dev); + ip_vs_forget_dev(dest, dev); } } } } - list_for_each_entry(dest, &ipvs->dest_trash, n_list) { - __ip_vs_dev_reset(dest, dev); + spin_lock_bh(&ipvs->dest_trash_lock); + list_for_each_entry(dest, &ipvs->dest_trash, t_list) { + ip_vs_forget_dev(dest, dev); } + spin_unlock_bh(&ipvs->dest_trash_lock); mutex_unlock(&__ip_vs_mutex); LeaveFunction(2); return NOTIFY_DONE; @@ -1570,12 +1528,10 @@ static int ip_vs_zero_service(struct ip_vs_service *svc) { struct ip_vs_dest *dest; - write_lock_bh(&__ip_vs_svc_lock); list_for_each_entry(dest, &svc->destinations, n_list) { ip_vs_zero_stats(&dest->stats); } ip_vs_zero_stats(&svc->stats); - write_unlock_bh(&__ip_vs_svc_lock); return 0; } @@ -1585,14 +1541,14 @@ static int ip_vs_zero_all(struct net *net) struct ip_vs_service *svc; for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { + hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { if (net_eq(svc->net, net)) ip_vs_zero_service(svc); } } for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { + hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { if (net_eq(svc->net, net)) ip_vs_zero_service(svc); } @@ -1920,7 +1876,7 @@ static struct ctl_table vs_vars[] = { struct ip_vs_iter { struct seq_net_private p; /* Do not move this, netns depends upon it*/ - struct list_head *table; + struct hlist_head *table; int bucket; }; @@ -1953,7 +1909,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) /* look in hash by protocol */ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { + hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) { if (net_eq(svc->net, net) && pos-- == 0) { iter->table = ip_vs_svc_table; iter->bucket = idx; @@ -1964,7 +1920,8 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) /* keep looking in fwmark */ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { + hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx], + f_list) { if (net_eq(svc->net, net) && pos-- == 0) { iter->table = ip_vs_svc_fwm_table; iter->bucket = idx; @@ -1977,17 +1934,16 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) } static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos) -__acquires(__ip_vs_svc_lock) { - read_lock_bh(&__ip_vs_svc_lock); + rcu_read_lock(); return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN; } static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct list_head *e; + struct hlist_node *e; struct ip_vs_iter *iter; struct ip_vs_service *svc; @@ -2000,13 +1956,14 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) if (iter->table == ip_vs_svc_table) { /* next service in table hashed by protocol */ - if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket]) - return list_entry(e, struct ip_vs_service, s_list); - + e = rcu_dereference(hlist_next_rcu(&svc->s_list)); + if (e) + return hlist_entry(e, struct ip_vs_service, s_list); while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { - list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket], - s_list) { + hlist_for_each_entry_rcu(svc, + &ip_vs_svc_table[iter->bucket], + s_list) { return svc; } } @@ -2017,13 +1974,15 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) } /* next service in hashed by fwmark */ - if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket]) - return list_entry(e, struct ip_vs_service, f_list); + e = rcu_dereference(hlist_next_rcu(&svc->f_list)); + if (e) + return hlist_entry(e, struct ip_vs_service, f_list); scan_fwmark: while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { - list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket], - f_list) + hlist_for_each_entry_rcu(svc, + &ip_vs_svc_fwm_table[iter->bucket], + f_list) return svc; } @@ -2031,9 +1990,8 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void ip_vs_info_seq_stop(struct seq_file *seq, void *v) -__releases(__ip_vs_svc_lock) { - read_unlock_bh(&__ip_vs_svc_lock); + rcu_read_unlock(); } @@ -2051,6 +2009,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) const struct ip_vs_service *svc = v; const struct ip_vs_iter *iter = seq->private; const struct ip_vs_dest *dest; + struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); if (iter->table == ip_vs_svc_table) { #ifdef CONFIG_IP_VS_IPV6 @@ -2059,18 +2018,18 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) ip_vs_proto_name(svc->protocol), &svc->addr.in6, ntohs(svc->port), - svc->scheduler->name); + sched->name); else #endif seq_printf(seq, "%s %08X:%04X %s %s ", ip_vs_proto_name(svc->protocol), ntohl(svc->addr.ip), ntohs(svc->port), - svc->scheduler->name, + sched->name, (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); } else { seq_printf(seq, "FWM %08X %s %s", - svc->fwmark, svc->scheduler->name, + svc->fwmark, sched->name, (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); } @@ -2081,7 +2040,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) else seq_putc(seq, '\n'); - list_for_each_entry(dest, &svc->destinations, n_list) { + list_for_each_entry_rcu(dest, &svc->destinations, n_list) { #ifdef CONFIG_IP_VS_IPV6 if (dest->af == AF_INET6) seq_printf(seq, @@ -2391,7 +2350,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) if (cmd == IP_VS_SO_SET_FLUSH) { /* Flush the virtual service */ - ret = ip_vs_flush(net); + ret = ip_vs_flush(net, false); goto out_unlock; } else if (cmd == IP_VS_SO_SET_TIMEOUT) { /* Set timeout values for (tcp tcpfin udp) */ @@ -2426,11 +2385,13 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) } /* Lookup the exact service by <protocol, addr, port> or fwmark */ + rcu_read_lock(); if (usvc.fwmark == 0) svc = __ip_vs_service_find(net, usvc.af, usvc.protocol, &usvc.addr, usvc.port); else svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark); + rcu_read_unlock(); if (cmd != IP_VS_SO_SET_ADD && (svc == NULL || svc->protocol != usvc.protocol)) { @@ -2482,11 +2443,14 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) static void ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) { + struct ip_vs_scheduler *sched; + + sched = rcu_dereference_protected(src->scheduler, 1); dst->protocol = src->protocol; dst->addr = src->addr.ip; dst->port = src->port; dst->fwmark = src->fwmark; - strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name)); + strlcpy(dst->sched_name, sched->name, sizeof(dst->sched_name)); dst->flags = src->flags; dst->timeout = src->timeout / HZ; dst->netmask = src->netmask; @@ -2505,7 +2469,7 @@ __ip_vs_get_service_entries(struct net *net, int ret = 0; for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { + hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { /* Only expose IPv4 entries to old interface */ if (svc->af != AF_INET || !net_eq(svc->net, net)) continue; @@ -2524,7 +2488,7 @@ __ip_vs_get_service_entries(struct net *net, } for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { + hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { /* Only expose IPv4 entries to old interface */ if (svc->af != AF_INET || !net_eq(svc->net, net)) continue; @@ -2553,11 +2517,13 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get, union nf_inet_addr addr = { .ip = get->addr }; int ret = 0; + rcu_read_lock(); if (get->fwmark) svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark); else svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr, get->port); + rcu_read_unlock(); if (svc) { int count = 0; @@ -2740,12 +2706,14 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) entry = (struct ip_vs_service_entry *)arg; addr.ip = entry->addr; + rcu_read_lock(); if (entry->fwmark) svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark); else svc = __ip_vs_service_find(net, AF_INET, entry->protocol, &addr, entry->port); + rcu_read_unlock(); if (svc) { ip_vs_copy_service(entry, svc); if (copy_to_user(user, entry, sizeof(*entry)) != 0) @@ -2902,6 +2870,7 @@ nla_put_failure: static int ip_vs_genl_fill_service(struct sk_buff *skb, struct ip_vs_service *svc) { + struct ip_vs_scheduler *sched; struct nlattr *nl_service; struct ip_vs_flags flags = { .flags = svc->flags, .mask = ~0 }; @@ -2922,7 +2891,8 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, goto nla_put_failure; } - if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name) || + sched = rcu_dereference_protected(svc->scheduler, 1); + if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched->name) || (svc->pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name)) || nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) || @@ -2973,7 +2943,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, mutex_lock(&__ip_vs_mutex); for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { - list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { + hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { if (++idx <= start || !net_eq(svc->net, net)) continue; if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { @@ -2984,7 +2954,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, } for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { - list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { + hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { if (++idx <= start || !net_eq(svc->net, net)) continue; if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { @@ -3044,11 +3014,13 @@ static int ip_vs_genl_parse_service(struct net *net, usvc->fwmark = 0; } + rcu_read_lock(); if (usvc->fwmark) svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark); else svc = __ip_vs_service_find(net, usvc->af, usvc->protocol, &usvc->addr, usvc->port); + rcu_read_unlock(); *ret_svc = svc; /* If a full entry was requested, check for the additional fields */ @@ -3400,7 +3372,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) mutex_lock(&__ip_vs_mutex); if (cmd == IPVS_CMD_FLUSH) { - ret = ip_vs_flush(net); + ret = ip_vs_flush(net, false); goto out; } else if (cmd == IPVS_CMD_SET_CONFIG) { ret = ip_vs_genl_set_config(net, info->attrs); @@ -3792,13 +3764,14 @@ int __net_init ip_vs_control_net_init(struct net *net) int idx; struct netns_ipvs *ipvs = net_ipvs(net); - rwlock_init(&ipvs->rs_lock); - /* Initialize rs_table */ for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) - INIT_LIST_HEAD(&ipvs->rs_table[idx]); + INIT_HLIST_HEAD(&ipvs->rs_table[idx]); INIT_LIST_HEAD(&ipvs->dest_trash); + spin_lock_init(&ipvs->dest_trash_lock); + setup_timer(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire, + (unsigned long) net); atomic_set(&ipvs->ftpsvc_counter, 0); atomic_set(&ipvs->nullsvc_counter, 0); @@ -3828,6 +3801,10 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); + /* Some dest can be in grace period even before cleanup, we have to + * defer ip_vs_trash_cleanup until ip_vs_dest_wait_readers is called. + */ + rcu_barrier(); ip_vs_trash_cleanup(net); ip_vs_stop_estimator(net, &ipvs->tot_stats); ip_vs_control_net_cleanup_sysctl(net); @@ -3873,10 +3850,10 @@ int __init ip_vs_control_init(void) EnterFunction(2); - /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */ + /* Initialize svc_table, ip_vs_svc_fwm_table */ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - INIT_LIST_HEAD(&ip_vs_svc_table[idx]); - INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); + INIT_HLIST_HEAD(&ip_vs_svc_table[idx]); + INIT_HLIST_HEAD(&ip_vs_svc_fwm_table[idx]); } smp_wmb(); /* Do we really need it now ? */ diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c index 7f3b0cc00b7a..ccab120df45e 100644 --- a/net/netfilter/ipvs/ip_vs_dh.c +++ b/net/netfilter/ipvs/ip_vs_dh.c @@ -51,7 +51,7 @@ * IPVS DH bucket */ struct ip_vs_dh_bucket { - struct ip_vs_dest *dest; /* real server (cache) */ + struct ip_vs_dest __rcu *dest; /* real server (cache) */ }; /* @@ -64,6 +64,10 @@ struct ip_vs_dh_bucket { #define IP_VS_DH_TAB_SIZE (1 << IP_VS_DH_TAB_BITS) #define IP_VS_DH_TAB_MASK (IP_VS_DH_TAB_SIZE - 1) +struct ip_vs_dh_state { + struct ip_vs_dh_bucket buckets[IP_VS_DH_TAB_SIZE]; + struct rcu_head rcu_head; +}; /* * Returns hash value for IPVS DH entry @@ -85,10 +89,9 @@ static inline unsigned int ip_vs_dh_hashkey(int af, const union nf_inet_addr *ad * Get ip_vs_dest associated with supplied parameters. */ static inline struct ip_vs_dest * -ip_vs_dh_get(int af, struct ip_vs_dh_bucket *tbl, - const union nf_inet_addr *addr) +ip_vs_dh_get(int af, struct ip_vs_dh_state *s, const union nf_inet_addr *addr) { - return (tbl[ip_vs_dh_hashkey(af, addr)]).dest; + return rcu_dereference(s->buckets[ip_vs_dh_hashkey(af, addr)].dest); } @@ -96,25 +99,30 @@ ip_vs_dh_get(int af, struct ip_vs_dh_bucket *tbl, * Assign all the hash buckets of the specified table with the service. */ static int -ip_vs_dh_assign(struct ip_vs_dh_bucket *tbl, struct ip_vs_service *svc) +ip_vs_dh_reassign(struct ip_vs_dh_state *s, struct ip_vs_service *svc) { int i; struct ip_vs_dh_bucket *b; struct list_head *p; struct ip_vs_dest *dest; + bool empty; - b = tbl; + b = &s->buckets[0]; p = &svc->destinations; + empty = list_empty(p); for (i=0; i<IP_VS_DH_TAB_SIZE; i++) { - if (list_empty(p)) { - b->dest = NULL; - } else { + dest = rcu_dereference_protected(b->dest, 1); + if (dest) + ip_vs_dest_put(dest); + if (empty) + RCU_INIT_POINTER(b->dest, NULL); + else { if (p == &svc->destinations) p = p->next; dest = list_entry(p, struct ip_vs_dest, n_list); - atomic_inc(&dest->refcnt); - b->dest = dest; + ip_vs_dest_hold(dest); + RCU_INIT_POINTER(b->dest, dest); p = p->next; } @@ -127,16 +135,18 @@ ip_vs_dh_assign(struct ip_vs_dh_bucket *tbl, struct ip_vs_service *svc) /* * Flush all the hash buckets of the specified table. */ -static void ip_vs_dh_flush(struct ip_vs_dh_bucket *tbl) +static void ip_vs_dh_flush(struct ip_vs_dh_state *s) { int i; struct ip_vs_dh_bucket *b; + struct ip_vs_dest *dest; - b = tbl; + b = &s->buckets[0]; for (i=0; i<IP_VS_DH_TAB_SIZE; i++) { - if (b->dest) { - atomic_dec(&b->dest->refcnt); - b->dest = NULL; + dest = rcu_dereference_protected(b->dest, 1); + if (dest) { + ip_vs_dest_put(dest); + RCU_INIT_POINTER(b->dest, NULL); } b++; } @@ -145,51 +155,46 @@ static void ip_vs_dh_flush(struct ip_vs_dh_bucket *tbl) static int ip_vs_dh_init_svc(struct ip_vs_service *svc) { - struct ip_vs_dh_bucket *tbl; + struct ip_vs_dh_state *s; /* allocate the DH table for this service */ - tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE, - GFP_KERNEL); - if (tbl == NULL) + s = kzalloc(sizeof(struct ip_vs_dh_state), GFP_KERNEL); + if (s == NULL) return -ENOMEM; - svc->sched_data = tbl; + svc->sched_data = s; IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) allocated for " "current service\n", sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE); - /* assign the hash buckets with the updated service */ - ip_vs_dh_assign(tbl, svc); + /* assign the hash buckets with current dests */ + ip_vs_dh_reassign(s, svc); return 0; } -static int ip_vs_dh_done_svc(struct ip_vs_service *svc) +static void ip_vs_dh_done_svc(struct ip_vs_service *svc) { - struct ip_vs_dh_bucket *tbl = svc->sched_data; + struct ip_vs_dh_state *s = svc->sched_data; /* got to clean up hash buckets here */ - ip_vs_dh_flush(tbl); + ip_vs_dh_flush(s); /* release the table itself */ - kfree(svc->sched_data); + kfree_rcu(s, rcu_head); IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) released\n", sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE); - - return 0; } -static int ip_vs_dh_update_svc(struct ip_vs_service *svc) +static int ip_vs_dh_dest_changed(struct ip_vs_service *svc, + struct ip_vs_dest *dest) { - struct ip_vs_dh_bucket *tbl = svc->sched_data; - - /* got to clean up hash buckets here */ - ip_vs_dh_flush(tbl); + struct ip_vs_dh_state *s = svc->sched_data; /* assign the hash buckets with the updated service */ - ip_vs_dh_assign(tbl, svc); + ip_vs_dh_reassign(s, svc); return 0; } @@ -212,19 +217,20 @@ static struct ip_vs_dest * ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) { struct ip_vs_dest *dest; - struct ip_vs_dh_bucket *tbl; + struct ip_vs_dh_state *s; struct ip_vs_iphdr iph; ip_vs_fill_iph_addr_only(svc->af, skb, &iph); IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); - tbl = (struct ip_vs_dh_bucket *)svc->sched_data; - dest = ip_vs_dh_get(svc->af, tbl, &iph.daddr); + s = (struct ip_vs_dh_state *) svc->sched_data; + dest = ip_vs_dh_get(svc->af, s, &iph.daddr); if (!dest || !(dest->flags & IP_VS_DEST_F_AVAILABLE) || atomic_read(&dest->weight) <= 0 || is_overloaded(dest)) { + ip_vs_scheduler_err(svc, "no destination available"); return NULL; } @@ -248,7 +254,8 @@ static struct ip_vs_scheduler ip_vs_dh_scheduler = .n_list = LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list), .init_service = ip_vs_dh_init_svc, .done_service = ip_vs_dh_done_svc, - .update_service = ip_vs_dh_update_svc, + .add_dest = ip_vs_dh_dest_changed, + .del_dest = ip_vs_dh_dest_changed, .schedule = ip_vs_dh_schedule, }; @@ -262,6 +269,7 @@ static int __init ip_vs_dh_init(void) static void __exit ip_vs_dh_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_dh_scheduler); + synchronize_rcu(); } diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 4f53a5f04437..77c173282f38 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -267,10 +267,12 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, * hopefully it will succeed on the retransmitted * packet. */ + rcu_read_lock(); ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, iph->ihl * 4, start-data, end-start, buf, buf_len); + rcu_read_unlock(); if (ret) { ip_vs_nfct_expect_related(skb, ct, n_cp, IPPROTO_TCP, 0, 0); @@ -480,6 +482,7 @@ static int __init ip_vs_ftp_init(void) int rv; rv = register_pernet_subsys(&ip_vs_ftp_ops); + /* rcu_barrier() is called by netns on error */ return rv; } @@ -489,6 +492,7 @@ static int __init ip_vs_ftp_init(void) static void __exit ip_vs_ftp_exit(void) { unregister_pernet_subsys(&ip_vs_ftp_ops); + /* rcu_barrier() is called by netns */ } diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index fdd89b9564ea..b2cc2528a4df 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -90,11 +90,12 @@ * IP address and its destination server */ struct ip_vs_lblc_entry { - struct list_head list; + struct hlist_node list; int af; /* address family */ union nf_inet_addr addr; /* destination IP address */ - struct ip_vs_dest *dest; /* real server (cache) */ + struct ip_vs_dest __rcu *dest; /* real server (cache) */ unsigned long lastuse; /* last used time */ + struct rcu_head rcu_head; }; @@ -102,12 +103,14 @@ struct ip_vs_lblc_entry { * IPVS lblc hash table */ struct ip_vs_lblc_table { - struct list_head bucket[IP_VS_LBLC_TAB_SIZE]; /* hash bucket */ + struct rcu_head rcu_head; + struct hlist_head __rcu bucket[IP_VS_LBLC_TAB_SIZE]; /* hash bucket */ + struct timer_list periodic_timer; /* collect stale entries */ atomic_t entries; /* number of entries */ int max_size; /* maximum size of entries */ - struct timer_list periodic_timer; /* collect stale entries */ int rover; /* rover for expire check */ int counter; /* counter for no expire */ + bool dead; }; @@ -129,13 +132,16 @@ static ctl_table vs_vars_table[] = { static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) { - list_del(&en->list); + struct ip_vs_dest *dest; + + hlist_del_rcu(&en->list); /* * We don't kfree dest because it is referred either by its service * or the trash dest list. */ - atomic_dec(&en->dest->refcnt); - kfree(en); + dest = rcu_dereference_protected(en->dest, 1); + ip_vs_dest_put(dest); + kfree_rcu(en, rcu_head); } @@ -165,15 +171,12 @@ ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en) { unsigned int hash = ip_vs_lblc_hashkey(en->af, &en->addr); - list_add(&en->list, &tbl->bucket[hash]); + hlist_add_head_rcu(&en->list, &tbl->bucket[hash]); atomic_inc(&tbl->entries); } -/* - * Get ip_vs_lblc_entry associated with supplied parameters. Called under read - * lock - */ +/* Get ip_vs_lblc_entry associated with supplied parameters. */ static inline struct ip_vs_lblc_entry * ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl, const union nf_inet_addr *addr) @@ -181,7 +184,7 @@ ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl, unsigned int hash = ip_vs_lblc_hashkey(af, addr); struct ip_vs_lblc_entry *en; - list_for_each_entry(en, &tbl->bucket[hash], list) + hlist_for_each_entry_rcu(en, &tbl->bucket[hash], list) if (ip_vs_addr_equal(af, &en->addr, addr)) return en; @@ -191,7 +194,7 @@ ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl, /* * Create or update an ip_vs_lblc_entry, which is a mapping of a destination IP - * address to a server. Called under write lock. + * address to a server. Called under spin lock. */ static inline struct ip_vs_lblc_entry * ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr, @@ -209,14 +212,20 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr, ip_vs_addr_copy(dest->af, &en->addr, daddr); en->lastuse = jiffies; - atomic_inc(&dest->refcnt); - en->dest = dest; + ip_vs_dest_hold(dest); + RCU_INIT_POINTER(en->dest, dest); ip_vs_lblc_hash(tbl, en); - } else if (en->dest != dest) { - atomic_dec(&en->dest->refcnt); - atomic_inc(&dest->refcnt); - en->dest = dest; + } else { + struct ip_vs_dest *old_dest; + + old_dest = rcu_dereference_protected(en->dest, 1); + if (old_dest != dest) { + ip_vs_dest_put(old_dest); + ip_vs_dest_hold(dest); + /* No ordering constraints for refcnt */ + RCU_INIT_POINTER(en->dest, dest); + } } return en; @@ -226,17 +235,22 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr, /* * Flush all the entries of the specified table. */ -static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl) +static void ip_vs_lblc_flush(struct ip_vs_service *svc) { - struct ip_vs_lblc_entry *en, *nxt; + struct ip_vs_lblc_table *tbl = svc->sched_data; + struct ip_vs_lblc_entry *en; + struct hlist_node *next; int i; + spin_lock_bh(&svc->sched_lock); + tbl->dead = 1; for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { - list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) { + hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) { ip_vs_lblc_free(en); atomic_dec(&tbl->entries); } } + spin_unlock_bh(&svc->sched_lock); } static int sysctl_lblc_expiration(struct ip_vs_service *svc) @@ -252,15 +266,16 @@ static int sysctl_lblc_expiration(struct ip_vs_service *svc) static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) { struct ip_vs_lblc_table *tbl = svc->sched_data; - struct ip_vs_lblc_entry *en, *nxt; + struct ip_vs_lblc_entry *en; + struct hlist_node *next; unsigned long now = jiffies; int i, j; for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { j = (j + 1) & IP_VS_LBLC_TAB_MASK; - write_lock(&svc->sched_lock); - list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { + spin_lock(&svc->sched_lock); + hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) { if (time_before(now, en->lastuse + sysctl_lblc_expiration(svc))) @@ -269,7 +284,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) ip_vs_lblc_free(en); atomic_dec(&tbl->entries); } - write_unlock(&svc->sched_lock); + spin_unlock(&svc->sched_lock); } tbl->rover = j; } @@ -293,7 +308,8 @@ static void ip_vs_lblc_check_expire(unsigned long data) unsigned long now = jiffies; int goal; int i, j; - struct ip_vs_lblc_entry *en, *nxt; + struct ip_vs_lblc_entry *en; + struct hlist_node *next; if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) { /* do full expiration check */ @@ -314,8 +330,8 @@ static void ip_vs_lblc_check_expire(unsigned long data) for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { j = (j + 1) & IP_VS_LBLC_TAB_MASK; - write_lock(&svc->sched_lock); - list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { + spin_lock(&svc->sched_lock); + hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) { if (time_before(now, en->lastuse + ENTRY_TIMEOUT)) continue; @@ -323,7 +339,7 @@ static void ip_vs_lblc_check_expire(unsigned long data) atomic_dec(&tbl->entries); goal--; } - write_unlock(&svc->sched_lock); + spin_unlock(&svc->sched_lock); if (goal <= 0) break; } @@ -354,11 +370,12 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc) * Initialize the hash buckets */ for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { - INIT_LIST_HEAD(&tbl->bucket[i]); + INIT_HLIST_HEAD(&tbl->bucket[i]); } tbl->max_size = IP_VS_LBLC_TAB_SIZE*16; tbl->rover = 0; tbl->counter = 1; + tbl->dead = 0; /* * Hook periodic timer for garbage collection @@ -371,7 +388,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc) } -static int ip_vs_lblc_done_svc(struct ip_vs_service *svc) +static void ip_vs_lblc_done_svc(struct ip_vs_service *svc) { struct ip_vs_lblc_table *tbl = svc->sched_data; @@ -379,14 +396,12 @@ static int ip_vs_lblc_done_svc(struct ip_vs_service *svc) del_timer_sync(&tbl->periodic_timer); /* got to clean up table entries here */ - ip_vs_lblc_flush(tbl); + ip_vs_lblc_flush(svc); /* release the table itself */ - kfree(tbl); + kfree_rcu(tbl, rcu_head); IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) released\n", sizeof(*tbl)); - - return 0; } @@ -408,7 +423,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc) * The server with weight=0 is quiesced and will not receive any * new connection. */ - list_for_each_entry(dest, &svc->destinations, n_list) { + list_for_each_entry_rcu(dest, &svc->destinations, n_list) { if (dest->flags & IP_VS_DEST_F_OVERLOAD) continue; if (atomic_read(&dest->weight) > 0) { @@ -423,7 +438,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc) * Find the destination with the least load. */ nextstage: - list_for_each_entry_continue(dest, &svc->destinations, n_list) { + list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) { if (dest->flags & IP_VS_DEST_F_OVERLOAD) continue; @@ -457,7 +472,7 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc) if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) { struct ip_vs_dest *d; - list_for_each_entry(d, &svc->destinations, n_list) { + list_for_each_entry_rcu(d, &svc->destinations, n_list) { if (atomic_read(&d->activeconns)*2 < atomic_read(&d->weight)) { return 1; @@ -484,7 +499,6 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); /* First look in our cache */ - read_lock(&svc->sched_lock); en = ip_vs_lblc_get(svc->af, tbl, &iph.daddr); if (en) { /* We only hold a read lock, but this is atomic */ @@ -499,14 +513,11 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) * free up entries from the trash at any time. */ - if (en->dest->flags & IP_VS_DEST_F_AVAILABLE) - dest = en->dest; + dest = rcu_dereference(en->dest); + if ((dest->flags & IP_VS_DEST_F_AVAILABLE) && + atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc)) + goto out; } - read_unlock(&svc->sched_lock); - - /* If the destination has a weight and is not overloaded, use it */ - if (dest && atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc)) - goto out; /* No cache entry or it is invalid, time to schedule */ dest = __ip_vs_lblc_schedule(svc); @@ -516,9 +527,10 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) } /* If we fail to create a cache entry, we'll just use the valid dest */ - write_lock(&svc->sched_lock); - ip_vs_lblc_new(tbl, &iph.daddr, dest); - write_unlock(&svc->sched_lock); + spin_lock_bh(&svc->sched_lock); + if (!tbl->dead) + ip_vs_lblc_new(tbl, &iph.daddr, dest); + spin_unlock_bh(&svc->sched_lock); out: IP_VS_DBG_BUF(6, "LBLC: destination IP address %s --> server %s:%d\n", @@ -621,6 +633,7 @@ static void __exit ip_vs_lblc_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler); unregister_pernet_subsys(&ip_vs_lblc_ops); + synchronize_rcu(); } diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index c03b6a3ade2f..feb9656eac58 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -89,40 +89,44 @@ */ struct ip_vs_dest_set_elem { struct list_head list; /* list link */ - struct ip_vs_dest *dest; /* destination server */ + struct ip_vs_dest __rcu *dest; /* destination server */ + struct rcu_head rcu_head; }; struct ip_vs_dest_set { atomic_t size; /* set size */ unsigned long lastmod; /* last modified time */ struct list_head list; /* destination list */ - rwlock_t lock; /* lock for this list */ }; -static struct ip_vs_dest_set_elem * -ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) +static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set, + struct ip_vs_dest *dest, bool check) { struct ip_vs_dest_set_elem *e; - list_for_each_entry(e, &set->list, list) { - if (e->dest == dest) - /* already existed */ - return NULL; + if (check) { + list_for_each_entry(e, &set->list, list) { + struct ip_vs_dest *d; + + d = rcu_dereference_protected(e->dest, 1); + if (d == dest) + /* already existed */ + return; + } } e = kmalloc(sizeof(*e), GFP_ATOMIC); if (e == NULL) - return NULL; + return; - atomic_inc(&dest->refcnt); - e->dest = dest; + ip_vs_dest_hold(dest); + RCU_INIT_POINTER(e->dest, dest); - list_add(&e->list, &set->list); + list_add_rcu(&e->list, &set->list); atomic_inc(&set->size); set->lastmod = jiffies; - return e; } static void @@ -131,13 +135,16 @@ ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) struct ip_vs_dest_set_elem *e; list_for_each_entry(e, &set->list, list) { - if (e->dest == dest) { + struct ip_vs_dest *d; + + d = rcu_dereference_protected(e->dest, 1); + if (d == dest) { /* HIT */ atomic_dec(&set->size); set->lastmod = jiffies; - atomic_dec(&e->dest->refcnt); - list_del(&e->list); - kfree(e); + ip_vs_dest_put(dest); + list_del_rcu(&e->list); + kfree_rcu(e, rcu_head); break; } } @@ -147,17 +154,18 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set) { struct ip_vs_dest_set_elem *e, *ep; - write_lock(&set->lock); list_for_each_entry_safe(e, ep, &set->list, list) { + struct ip_vs_dest *d; + + d = rcu_dereference_protected(e->dest, 1); /* * We don't kfree dest because it is referred either * by its service or by the trash dest list. */ - atomic_dec(&e->dest->refcnt); - list_del(&e->list); - kfree(e); + ip_vs_dest_put(d); + list_del_rcu(&e->list); + kfree_rcu(e, rcu_head); } - write_unlock(&set->lock); } /* get weighted least-connection node in the destination set */ @@ -171,8 +179,8 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) return NULL; /* select the first destination server, whose weight > 0 */ - list_for_each_entry(e, &set->list, list) { - least = e->dest; + list_for_each_entry_rcu(e, &set->list, list) { + least = rcu_dereference(e->dest); if (least->flags & IP_VS_DEST_F_OVERLOAD) continue; @@ -186,8 +194,8 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) /* find the destination with the weighted least load */ nextstage: - list_for_each_entry(e, &set->list, list) { - dest = e->dest; + list_for_each_entry_continue_rcu(e, &set->list, list) { + dest = rcu_dereference(e->dest); if (dest->flags & IP_VS_DEST_F_OVERLOAD) continue; @@ -224,7 +232,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) /* select the first destination server, whose weight > 0 */ list_for_each_entry(e, &set->list, list) { - most = e->dest; + most = rcu_dereference_protected(e->dest, 1); if (atomic_read(&most->weight) > 0) { moh = ip_vs_dest_conn_overhead(most); goto nextstage; @@ -234,8 +242,8 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) /* find the destination with the weighted most load */ nextstage: - list_for_each_entry(e, &set->list, list) { - dest = e->dest; + list_for_each_entry_continue(e, &set->list, list) { + dest = rcu_dereference_protected(e->dest, 1); doh = ip_vs_dest_conn_overhead(dest); /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */ if ((moh * atomic_read(&dest->weight) < @@ -262,11 +270,12 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) * IP address and its destination server set */ struct ip_vs_lblcr_entry { - struct list_head list; + struct hlist_node list; int af; /* address family */ union nf_inet_addr addr; /* destination IP address */ struct ip_vs_dest_set set; /* destination server set */ unsigned long lastuse; /* last used time */ + struct rcu_head rcu_head; }; @@ -274,12 +283,14 @@ struct ip_vs_lblcr_entry { * IPVS lblcr hash table */ struct ip_vs_lblcr_table { - struct list_head bucket[IP_VS_LBLCR_TAB_SIZE]; /* hash bucket */ + struct rcu_head rcu_head; + struct hlist_head __rcu bucket[IP_VS_LBLCR_TAB_SIZE]; /* hash bucket */ atomic_t entries; /* number of entries */ int max_size; /* maximum size of entries */ struct timer_list periodic_timer; /* collect stale entries */ int rover; /* rover for expire check */ int counter; /* counter for no expire */ + bool dead; }; @@ -302,9 +313,9 @@ static ctl_table vs_vars_table[] = { static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en) { - list_del(&en->list); + hlist_del_rcu(&en->list); ip_vs_dest_set_eraseall(&en->set); - kfree(en); + kfree_rcu(en, rcu_head); } @@ -334,15 +345,12 @@ ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en) { unsigned int hash = ip_vs_lblcr_hashkey(en->af, &en->addr); - list_add(&en->list, &tbl->bucket[hash]); + hlist_add_head_rcu(&en->list, &tbl->bucket[hash]); atomic_inc(&tbl->entries); } -/* - * Get ip_vs_lblcr_entry associated with supplied parameters. Called under - * read lock. - */ +/* Get ip_vs_lblcr_entry associated with supplied parameters. */ static inline struct ip_vs_lblcr_entry * ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *addr) @@ -350,7 +358,7 @@ ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl, unsigned int hash = ip_vs_lblcr_hashkey(af, addr); struct ip_vs_lblcr_entry *en; - list_for_each_entry(en, &tbl->bucket[hash], list) + hlist_for_each_entry_rcu(en, &tbl->bucket[hash], list) if (ip_vs_addr_equal(af, &en->addr, addr)) return en; @@ -360,7 +368,7 @@ ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl, /* * Create or update an ip_vs_lblcr_entry, which is a mapping of a destination - * IP address to a server. Called under write lock. + * IP address to a server. Called under spin lock. */ static inline struct ip_vs_lblcr_entry * ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr, @@ -381,14 +389,14 @@ ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr, /* initialize its dest set */ atomic_set(&(en->set.size), 0); INIT_LIST_HEAD(&en->set.list); - rwlock_init(&en->set.lock); + + ip_vs_dest_set_insert(&en->set, dest, false); ip_vs_lblcr_hash(tbl, en); + return en; } - write_lock(&en->set.lock); - ip_vs_dest_set_insert(&en->set, dest); - write_unlock(&en->set.lock); + ip_vs_dest_set_insert(&en->set, dest, true); return en; } @@ -397,17 +405,21 @@ ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr, /* * Flush all the entries of the specified table. */ -static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl) +static void ip_vs_lblcr_flush(struct ip_vs_service *svc) { + struct ip_vs_lblcr_table *tbl = svc->sched_data; int i; - struct ip_vs_lblcr_entry *en, *nxt; + struct ip_vs_lblcr_entry *en; + struct hlist_node *next; - /* No locking required, only called during cleanup. */ + spin_lock_bh(&svc->sched_lock); + tbl->dead = 1; for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) { - list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) { + hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) { ip_vs_lblcr_free(en); } } + spin_unlock_bh(&svc->sched_lock); } static int sysctl_lblcr_expiration(struct ip_vs_service *svc) @@ -425,13 +437,14 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc) struct ip_vs_lblcr_table *tbl = svc->sched_data; unsigned long now = jiffies; int i, j; - struct ip_vs_lblcr_entry *en, *nxt; + struct ip_vs_lblcr_entry *en; + struct hlist_node *next; for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { j = (j + 1) & IP_VS_LBLCR_TAB_MASK; - write_lock(&svc->sched_lock); - list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { + spin_lock(&svc->sched_lock); + hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) { if (time_after(en->lastuse + sysctl_lblcr_expiration(svc), now)) continue; @@ -439,7 +452,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc) ip_vs_lblcr_free(en); atomic_dec(&tbl->entries); } - write_unlock(&svc->sched_lock); + spin_unlock(&svc->sched_lock); } tbl->rover = j; } @@ -463,7 +476,8 @@ static void ip_vs_lblcr_check_expire(unsigned long data) unsigned long now = jiffies; int goal; int i, j; - struct ip_vs_lblcr_entry *en, *nxt; + struct ip_vs_lblcr_entry *en; + struct hlist_node *next; if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) { /* do full expiration check */ @@ -484,8 +498,8 @@ static void ip_vs_lblcr_check_expire(unsigned long data) for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { j = (j + 1) & IP_VS_LBLCR_TAB_MASK; - write_lock(&svc->sched_lock); - list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { + spin_lock(&svc->sched_lock); + hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) { if (time_before(now, en->lastuse+ENTRY_TIMEOUT)) continue; @@ -493,7 +507,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data) atomic_dec(&tbl->entries); goal--; } - write_unlock(&svc->sched_lock); + spin_unlock(&svc->sched_lock); if (goal <= 0) break; } @@ -523,11 +537,12 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc) * Initialize the hash buckets */ for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) { - INIT_LIST_HEAD(&tbl->bucket[i]); + INIT_HLIST_HEAD(&tbl->bucket[i]); } tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16; tbl->rover = 0; tbl->counter = 1; + tbl->dead = 0; /* * Hook periodic timer for garbage collection @@ -540,7 +555,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc) } -static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc) +static void ip_vs_lblcr_done_svc(struct ip_vs_service *svc) { struct ip_vs_lblcr_table *tbl = svc->sched_data; @@ -548,14 +563,12 @@ static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc) del_timer_sync(&tbl->periodic_timer); /* got to clean up table entries here */ - ip_vs_lblcr_flush(tbl); + ip_vs_lblcr_flush(svc); /* release the table itself */ - kfree(tbl); + kfree_rcu(tbl, rcu_head); IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n", sizeof(*tbl)); - - return 0; } @@ -577,7 +590,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc) * The server with weight=0 is quiesced and will not receive any * new connection. */ - list_for_each_entry(dest, &svc->destinations, n_list) { + list_for_each_entry_rcu(dest, &svc->destinations, n_list) { if (dest->flags & IP_VS_DEST_F_OVERLOAD) continue; @@ -593,7 +606,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc) * Find the destination with the least load. */ nextstage: - list_for_each_entry_continue(dest, &svc->destinations, n_list) { + list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) { if (dest->flags & IP_VS_DEST_F_OVERLOAD) continue; @@ -627,7 +640,7 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc) if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) { struct ip_vs_dest *d; - list_for_each_entry(d, &svc->destinations, n_list) { + list_for_each_entry_rcu(d, &svc->destinations, n_list) { if (atomic_read(&d->activeconns)*2 < atomic_read(&d->weight)) { return 1; @@ -646,7 +659,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) { struct ip_vs_lblcr_table *tbl = svc->sched_data; struct ip_vs_iphdr iph; - struct ip_vs_dest *dest = NULL; + struct ip_vs_dest *dest; struct ip_vs_lblcr_entry *en; ip_vs_fill_iph_addr_only(svc->af, skb, &iph); @@ -654,53 +667,46 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); /* First look in our cache */ - read_lock(&svc->sched_lock); en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr); if (en) { - /* We only hold a read lock, but this is atomic */ en->lastuse = jiffies; /* Get the least loaded destination */ - read_lock(&en->set.lock); dest = ip_vs_dest_set_min(&en->set); - read_unlock(&en->set.lock); /* More than one destination + enough time passed by, cleanup */ if (atomic_read(&en->set.size) > 1 && - time_after(jiffies, en->set.lastmod + + time_after(jiffies, en->set.lastmod + sysctl_lblcr_expiration(svc))) { - struct ip_vs_dest *m; + spin_lock_bh(&svc->sched_lock); + if (atomic_read(&en->set.size) > 1) { + struct ip_vs_dest *m; - write_lock(&en->set.lock); - m = ip_vs_dest_set_max(&en->set); - if (m) - ip_vs_dest_set_erase(&en->set, m); - write_unlock(&en->set.lock); + m = ip_vs_dest_set_max(&en->set); + if (m) + ip_vs_dest_set_erase(&en->set, m); + } + spin_unlock_bh(&svc->sched_lock); } /* If the destination is not overloaded, use it */ - if (dest && !is_overloaded(dest, svc)) { - read_unlock(&svc->sched_lock); + if (dest && !is_overloaded(dest, svc)) goto out; - } /* The cache entry is invalid, time to schedule */ dest = __ip_vs_lblcr_schedule(svc); if (!dest) { ip_vs_scheduler_err(svc, "no destination available"); - read_unlock(&svc->sched_lock); return NULL; } /* Update our cache entry */ - write_lock(&en->set.lock); - ip_vs_dest_set_insert(&en->set, dest); - write_unlock(&en->set.lock); - } - read_unlock(&svc->sched_lock); - - if (dest) + spin_lock_bh(&svc->sched_lock); + if (!tbl->dead) + ip_vs_dest_set_insert(&en->set, dest, true); + spin_unlock_bh(&svc->sched_lock); goto out; + } /* No cache entry, time to schedule */ dest = __ip_vs_lblcr_schedule(svc); @@ -710,9 +716,10 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) } /* If we fail to create a cache entry, we'll just use the valid dest */ - write_lock(&svc->sched_lock); - ip_vs_lblcr_new(tbl, &iph.daddr, dest); - write_unlock(&svc->sched_lock); + spin_lock_bh(&svc->sched_lock); + if (!tbl->dead) + ip_vs_lblcr_new(tbl, &iph.daddr, dest); + spin_unlock_bh(&svc->sched_lock); out: IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n", @@ -814,6 +821,7 @@ static void __exit ip_vs_lblcr_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); unregister_pernet_subsys(&ip_vs_lblcr_ops); + synchronize_rcu(); } diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c index f391819c0cca..5128e338a749 100644 --- a/net/netfilter/ipvs/ip_vs_lc.c +++ b/net/netfilter/ipvs/ip_vs_lc.c @@ -42,7 +42,7 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) * served, but no new connection is assigned to the server. */ - list_for_each_entry(dest, &svc->destinations, n_list) { + list_for_each_entry_rcu(dest, &svc->destinations, n_list) { if ((dest->flags & IP_VS_DEST_F_OVERLOAD) || atomic_read(&dest->weight) == 0) continue; @@ -84,6 +84,7 @@ static int __init ip_vs_lc_init(void) static void __exit ip_vs_lc_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_lc_scheduler); + synchronize_rcu(); } module_init(ip_vs_lc_init); diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c index 984d9c137d84..646cfd4baa73 100644 --- a/net/netfilter/ipvs/ip_vs_nq.c +++ b/net/netfilter/ipvs/ip_vs_nq.c @@ -75,7 +75,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) * new connections. */ - list_for_each_entry(dest, &svc->destinations, n_list) { + list_for_each_entry_rcu(dest, &svc->destinations, n_list) { if (dest->flags & IP_VS_DEST_F_OVERLOAD || !atomic_read(&dest->weight)) @@ -133,6 +133,7 @@ static int __init ip_vs_nq_init(void) static void __exit ip_vs_nq_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_nq_scheduler); + synchronize_rcu(); } module_init(ip_vs_nq_init); diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c index 5cf859ccb31b..1a82b29ce8ea 100644 --- a/net/netfilter/ipvs/ip_vs_pe.c +++ b/net/netfilter/ipvs/ip_vs_pe.c @@ -13,20 +13,8 @@ /* IPVS pe list */ static LIST_HEAD(ip_vs_pe); -/* lock for service table */ -static DEFINE_SPINLOCK(ip_vs_pe_lock); - -/* Bind a service with a pe */ -void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe) -{ - svc->pe = pe; -} - -/* Unbind a service from its pe */ -void ip_vs_unbind_pe(struct ip_vs_service *svc) -{ - svc->pe = NULL; -} +/* semaphore for IPVS PEs. */ +static DEFINE_MUTEX(ip_vs_pe_mutex); /* Get pe in the pe list by name */ struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name) @@ -36,9 +24,8 @@ struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name) IP_VS_DBG(10, "%s(): pe_name \"%s\"\n", __func__, pe_name); - spin_lock_bh(&ip_vs_pe_lock); - - list_for_each_entry(pe, &ip_vs_pe, n_list) { + rcu_read_lock(); + list_for_each_entry_rcu(pe, &ip_vs_pe, n_list) { /* Test and get the modules atomically */ if (pe->module && !try_module_get(pe->module)) { @@ -47,14 +34,14 @@ struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name) } if (strcmp(pe_name, pe->name)==0) { /* HIT */ - spin_unlock_bh(&ip_vs_pe_lock); + rcu_read_unlock(); return pe; } if (pe->module) module_put(pe->module); } + rcu_read_unlock(); - spin_unlock_bh(&ip_vs_pe_lock); return NULL; } @@ -83,22 +70,13 @@ int register_ip_vs_pe(struct ip_vs_pe *pe) /* increase the module use count */ ip_vs_use_count_inc(); - spin_lock_bh(&ip_vs_pe_lock); - - if (!list_empty(&pe->n_list)) { - spin_unlock_bh(&ip_vs_pe_lock); - ip_vs_use_count_dec(); - pr_err("%s(): [%s] pe already linked\n", - __func__, pe->name); - return -EINVAL; - } - + mutex_lock(&ip_vs_pe_mutex); /* Make sure that the pe with this name doesn't exist * in the pe list. */ list_for_each_entry(tmp, &ip_vs_pe, n_list) { if (strcmp(tmp->name, pe->name) == 0) { - spin_unlock_bh(&ip_vs_pe_lock); + mutex_unlock(&ip_vs_pe_mutex); ip_vs_use_count_dec(); pr_err("%s(): [%s] pe already existed " "in the system\n", __func__, pe->name); @@ -106,8 +84,8 @@ int register_ip_vs_pe(struct ip_vs_pe *pe) } } /* Add it into the d-linked pe list */ - list_add(&pe->n_list, &ip_vs_pe); - spin_unlock_bh(&ip_vs_pe_lock); + list_add_rcu(&pe->n_list, &ip_vs_pe); + mutex_unlock(&ip_vs_pe_mutex); pr_info("[%s] pe registered.\n", pe->name); @@ -118,17 +96,10 @@ EXPORT_SYMBOL_GPL(register_ip_vs_pe); /* Unregister a pe from the pe list */ int unregister_ip_vs_pe(struct ip_vs_pe *pe) { - spin_lock_bh(&ip_vs_pe_lock); - if (list_empty(&pe->n_list)) { - spin_unlock_bh(&ip_vs_pe_lock); - pr_err("%s(): [%s] pe is not in the list. failed\n", - __func__, pe->name); - return -EINVAL; - } - + mutex_lock(&ip_vs_pe_mutex); /* Remove it from the d-linked pe list */ - list_del(&pe->n_list); - spin_unlock_bh(&ip_vs_pe_lock); + list_del_rcu(&pe->n_list); + mutex_unlock(&ip_vs_pe_mutex); /* decrease the module use count */ ip_vs_use_count_dec(); diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c index 12475ef88daf..00cc0241ed87 100644 --- a/net/netfilter/ipvs/ip_vs_pe_sip.c +++ b/net/netfilter/ipvs/ip_vs_pe_sip.c @@ -172,6 +172,7 @@ static int __init ip_vs_sip_init(void) static void __exit ip_vs_sip_cleanup(void) { unregister_ip_vs_pe(&ip_vs_sip_pe); + synchronize_rcu(); } module_init(ip_vs_sip_init); diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index cd1d7298f7ba..6e14a7b5602f 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -27,9 +27,10 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, if (sch == NULL) return 0; net = skb_net(skb); + rcu_read_lock(); if ((sch->type == SCTP_CID_INIT) && - (svc = ip_vs_service_get(net, af, skb->mark, iph->protocol, - &iph->daddr, sh->dest))) { + (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, + &iph->daddr, sh->dest))) { int ignored; if (ip_vs_todrop(net_ipvs(net))) { @@ -37,7 +38,7 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, * It seems that we are very loaded. * We have to drop this packet :( */ - ip_vs_service_put(svc); + rcu_read_unlock(); *verdict = NF_DROP; return 0; } @@ -49,14 +50,13 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, if (!*cpp && ignored <= 0) { if (!ignored) *verdict = ip_vs_leave(svc, skb, pd, iph); - else { - ip_vs_service_put(svc); + else *verdict = NF_DROP; - } + rcu_read_unlock(); return 0; } - ip_vs_service_put(svc); } + rcu_read_unlock(); /* NF_ACCEPT */ return 1; } @@ -994,9 +994,9 @@ static void sctp_state_transition(struct ip_vs_conn *cp, int direction, const struct sk_buff *skb, struct ip_vs_proto_data *pd) { - spin_lock(&cp->lock); + spin_lock_bh(&cp->lock); set_sctp_state(pd, cp, direction, skb); - spin_unlock(&cp->lock); + spin_unlock_bh(&cp->lock); } static inline __u16 sctp_app_hashkey(__be16 port) @@ -1016,30 +1016,25 @@ static int sctp_register_app(struct net *net, struct ip_vs_app *inc) hash = sctp_app_hashkey(port); - spin_lock_bh(&ipvs->sctp_app_lock); list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) { if (i->port == port) { ret = -EEXIST; goto out; } } - list_add(&inc->p_list, &ipvs->sctp_apps[hash]); + list_add_rcu(&inc->p_list, &ipvs->sctp_apps[hash]); atomic_inc(&pd->appcnt); out: - spin_unlock_bh(&ipvs->sctp_app_lock); return ret; } static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc) { - struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP); - spin_lock_bh(&ipvs->sctp_app_lock); atomic_dec(&pd->appcnt); - list_del(&inc->p_list); - spin_unlock_bh(&ipvs->sctp_app_lock); + list_del_rcu(&inc->p_list); } static int sctp_app_conn_bind(struct ip_vs_conn *cp) @@ -1055,12 +1050,12 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp) /* Lookup application incarnations and bind the right one */ hash = sctp_app_hashkey(cp->vport); - spin_lock(&ipvs->sctp_app_lock); - list_for_each_entry(inc, &ipvs->sctp_apps[hash], p_list) { + rcu_read_lock(); + list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) { if (inc->port == cp->vport) { if (unlikely(!ip_vs_app_inc_get(inc))) break; - spin_unlock(&ipvs->sctp_app_lock); + rcu_read_unlock(); IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" "%s:%u to app %s on port %u\n", @@ -1076,7 +1071,7 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp) goto out; } } - spin_unlock(&ipvs->sctp_app_lock); + rcu_read_unlock(); out: return result; } @@ -1090,7 +1085,6 @@ static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd) struct netns_ipvs *ipvs = net_ipvs(net); ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE); - spin_lock_init(&ipvs->sctp_app_lock); pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts, sizeof(sctp_timeouts)); if (!pd->timeout_table) diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 9af653a75825..50a15944c6c1 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -47,9 +47,10 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, } net = skb_net(skb); /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ + rcu_read_lock(); if (th->syn && - (svc = ip_vs_service_get(net, af, skb->mark, iph->protocol, - &iph->daddr, th->dest))) { + (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, + &iph->daddr, th->dest))) { int ignored; if (ip_vs_todrop(net_ipvs(net))) { @@ -57,7 +58,7 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, * It seems that we are very loaded. * We have to drop this packet :( */ - ip_vs_service_put(svc); + rcu_read_unlock(); *verdict = NF_DROP; return 0; } @@ -70,14 +71,13 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, if (!*cpp && ignored <= 0) { if (!ignored) *verdict = ip_vs_leave(svc, skb, pd, iph); - else { - ip_vs_service_put(svc); + else *verdict = NF_DROP; - } + rcu_read_unlock(); return 0; } - ip_vs_service_put(svc); } + rcu_read_unlock(); /* NF_ACCEPT */ return 1; } @@ -557,9 +557,9 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction, if (th == NULL) return; - spin_lock(&cp->lock); + spin_lock_bh(&cp->lock); set_tcp_state(pd, cp, direction, th); - spin_unlock(&cp->lock); + spin_unlock_bh(&cp->lock); } static inline __u16 tcp_app_hashkey(__be16 port) @@ -580,18 +580,16 @@ static int tcp_register_app(struct net *net, struct ip_vs_app *inc) hash = tcp_app_hashkey(port); - spin_lock_bh(&ipvs->tcp_app_lock); list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) { if (i->port == port) { ret = -EEXIST; goto out; } } - list_add(&inc->p_list, &ipvs->tcp_apps[hash]); + list_add_rcu(&inc->p_list, &ipvs->tcp_apps[hash]); atomic_inc(&pd->appcnt); out: - spin_unlock_bh(&ipvs->tcp_app_lock); return ret; } @@ -599,13 +597,10 @@ static int tcp_register_app(struct net *net, struct ip_vs_app *inc) static void tcp_unregister_app(struct net *net, struct ip_vs_app *inc) { - struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); - spin_lock_bh(&ipvs->tcp_app_lock); atomic_dec(&pd->appcnt); - list_del(&inc->p_list); - spin_unlock_bh(&ipvs->tcp_app_lock); + list_del_rcu(&inc->p_list); } @@ -624,12 +619,12 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) /* Lookup application incarnations and bind the right one */ hash = tcp_app_hashkey(cp->vport); - spin_lock(&ipvs->tcp_app_lock); - list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) { + rcu_read_lock(); + list_for_each_entry_rcu(inc, &ipvs->tcp_apps[hash], p_list) { if (inc->port == cp->vport) { if (unlikely(!ip_vs_app_inc_get(inc))) break; - spin_unlock(&ipvs->tcp_app_lock); + rcu_read_unlock(); IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" "%s:%u to app %s on port %u\n", @@ -646,7 +641,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) goto out; } } - spin_unlock(&ipvs->tcp_app_lock); + rcu_read_unlock(); out: return result; @@ -660,11 +655,11 @@ void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp) { struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); - spin_lock(&cp->lock); + spin_lock_bh(&cp->lock); cp->state = IP_VS_TCP_S_LISTEN; cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN] : tcp_timeouts[IP_VS_TCP_S_LISTEN]); - spin_unlock(&cp->lock); + spin_unlock_bh(&cp->lock); } /* --------------------------------------------- @@ -676,7 +671,6 @@ static int __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd) struct netns_ipvs *ipvs = net_ipvs(net); ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE); - spin_lock_init(&ipvs->tcp_app_lock); pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts, sizeof(tcp_timeouts)); if (!pd->timeout_table) diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 503a842c90d2..b62a3c0ff9bf 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -44,8 +44,9 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, return 0; } net = skb_net(skb); - svc = ip_vs_service_get(net, af, skb->mark, iph->protocol, - &iph->daddr, uh->dest); + rcu_read_lock(); + svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, + &iph->daddr, uh->dest); if (svc) { int ignored; @@ -54,7 +55,7 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, * It seems that we are very loaded. * We have to drop this packet :( */ - ip_vs_service_put(svc); + rcu_read_unlock(); *verdict = NF_DROP; return 0; } @@ -67,14 +68,13 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, if (!*cpp && ignored <= 0) { if (!ignored) *verdict = ip_vs_leave(svc, skb, pd, iph); - else { - ip_vs_service_put(svc); + else *verdict = NF_DROP; - } + rcu_read_unlock(); return 0; } - ip_vs_service_put(svc); } + rcu_read_unlock(); /* NF_ACCEPT */ return 1; } @@ -359,19 +359,16 @@ static int udp_register_app(struct net *net, struct ip_vs_app *inc) hash = udp_app_hashkey(port); - - spin_lock_bh(&ipvs->udp_app_lock); list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) { if (i->port == port) { ret = -EEXIST; goto out; } } - list_add(&inc->p_list, &ipvs->udp_apps[hash]); + list_add_rcu(&inc->p_list, &ipvs->udp_apps[hash]); atomic_inc(&pd->appcnt); out: - spin_unlock_bh(&ipvs->udp_app_lock); return ret; } @@ -380,12 +377,9 @@ static void udp_unregister_app(struct net *net, struct ip_vs_app *inc) { struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP); - struct netns_ipvs *ipvs = net_ipvs(net); - spin_lock_bh(&ipvs->udp_app_lock); atomic_dec(&pd->appcnt); - list_del(&inc->p_list); - spin_unlock_bh(&ipvs->udp_app_lock); + list_del_rcu(&inc->p_list); } @@ -403,12 +397,12 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp) /* Lookup application incarnations and bind the right one */ hash = udp_app_hashkey(cp->vport); - spin_lock(&ipvs->udp_app_lock); - list_for_each_entry(inc, &ipvs->udp_apps[hash], p_list) { + rcu_read_lock(); + list_for_each_entry_rcu(inc, &ipvs->udp_apps[hash], p_list) { if (inc->port == cp->vport) { if (unlikely(!ip_vs_app_inc_get(inc))) break; - spin_unlock(&ipvs->udp_app_lock); + rcu_read_unlock(); IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" "%s:%u to app %s on port %u\n", @@ -425,7 +419,7 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp) goto out; } } - spin_unlock(&ipvs->udp_app_lock); + rcu_read_unlock(); out: return result; @@ -467,7 +461,6 @@ static int __udp_init(struct net *net, struct ip_vs_proto_data *pd) struct netns_ipvs *ipvs = net_ipvs(net); ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE); - spin_lock_init(&ipvs->udp_app_lock); pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts, sizeof(udp_timeouts)); if (!pd->timeout_table) diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c index c49b388d1085..c35986c793d9 100644 --- a/net/netfilter/ipvs/ip_vs_rr.c +++ b/net/netfilter/ipvs/ip_vs_rr.c @@ -35,9 +35,18 @@ static int ip_vs_rr_init_svc(struct ip_vs_service *svc) } -static int ip_vs_rr_update_svc(struct ip_vs_service *svc) +static int ip_vs_rr_del_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest) { - svc->sched_data = &svc->destinations; + struct list_head *p; + + spin_lock_bh(&svc->sched_lock); + p = (struct list_head *) svc->sched_data; + /* dest is already unlinked, so p->prev is not valid but + * p->next is valid, use it to reach previous entry. + */ + if (p == &dest->n_list) + svc->sched_data = p->next->prev; + spin_unlock_bh(&svc->sched_lock); return 0; } @@ -48,36 +57,41 @@ static int ip_vs_rr_update_svc(struct ip_vs_service *svc) static struct ip_vs_dest * ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) { - struct list_head *p, *q; - struct ip_vs_dest *dest; + struct list_head *p; + struct ip_vs_dest *dest, *last; + int pass = 0; IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); - write_lock(&svc->sched_lock); - p = (struct list_head *)svc->sched_data; - p = p->next; - q = p; + spin_lock_bh(&svc->sched_lock); + p = (struct list_head *) svc->sched_data; + last = dest = list_entry(p, struct ip_vs_dest, n_list); + do { - /* skip list head */ - if (q == &svc->destinations) { - q = q->next; - continue; + list_for_each_entry_continue_rcu(dest, + &svc->destinations, + n_list) { + if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && + atomic_read(&dest->weight) > 0) + /* HIT */ + goto out; + if (dest == last) + goto stop; } - - dest = list_entry(q, struct ip_vs_dest, n_list); - if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && - atomic_read(&dest->weight) > 0) - /* HIT */ - goto out; - q = q->next; - } while (q != p); - write_unlock(&svc->sched_lock); + pass++; + /* Previous dest could be unlinked, do not loop forever. + * If we stay at head there is no need for 2nd pass. + */ + } while (pass < 2 && p != &svc->destinations); + +stop: + spin_unlock_bh(&svc->sched_lock); ip_vs_scheduler_err(svc, "no destination available"); return NULL; out: - svc->sched_data = q; - write_unlock(&svc->sched_lock); + svc->sched_data = &dest->n_list; + spin_unlock_bh(&svc->sched_lock); IP_VS_DBG_BUF(6, "RR: server %s:%u " "activeconns %d refcnt %d weight %d\n", IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port), @@ -94,7 +108,8 @@ static struct ip_vs_scheduler ip_vs_rr_scheduler = { .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list), .init_service = ip_vs_rr_init_svc, - .update_service = ip_vs_rr_update_svc, + .add_dest = NULL, + .del_dest = ip_vs_rr_del_dest, .schedule = ip_vs_rr_schedule, }; @@ -106,6 +121,7 @@ static int __init ip_vs_rr_init(void) static void __exit ip_vs_rr_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_rr_scheduler); + synchronize_rcu(); } module_init(ip_vs_rr_init); diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c index d6bf20d6cdbe..4dbcda6258bc 100644 --- a/net/netfilter/ipvs/ip_vs_sched.c +++ b/net/netfilter/ipvs/ip_vs_sched.c @@ -35,8 +35,8 @@ EXPORT_SYMBOL(ip_vs_scheduler_err); */ static LIST_HEAD(ip_vs_schedulers); -/* lock for service table */ -static DEFINE_SPINLOCK(ip_vs_sched_lock); +/* semaphore for schedulers */ +static DEFINE_MUTEX(ip_vs_sched_mutex); /* @@ -47,8 +47,6 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc, { int ret; - svc->scheduler = scheduler; - if (scheduler->init_service) { ret = scheduler->init_service(svc); if (ret) { @@ -56,7 +54,7 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc, return ret; } } - + rcu_assign_pointer(svc->scheduler, scheduler); return 0; } @@ -64,22 +62,19 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc, /* * Unbind a service with its scheduler */ -int ip_vs_unbind_scheduler(struct ip_vs_service *svc) +void ip_vs_unbind_scheduler(struct ip_vs_service *svc, + struct ip_vs_scheduler *sched) { - struct ip_vs_scheduler *sched = svc->scheduler; + struct ip_vs_scheduler *cur_sched; - if (!sched) - return 0; + cur_sched = rcu_dereference_protected(svc->scheduler, 1); + /* This check proves that old 'sched' was installed */ + if (!cur_sched) + return; - if (sched->done_service) { - if (sched->done_service(svc) != 0) { - pr_err("%s(): done error\n", __func__); - return -EINVAL; - } - } - - svc->scheduler = NULL; - return 0; + if (sched->done_service) + sched->done_service(svc); + /* svc->scheduler can not be set to NULL */ } @@ -92,7 +87,7 @@ static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name) IP_VS_DBG(2, "%s(): sched_name \"%s\"\n", __func__, sched_name); - spin_lock_bh(&ip_vs_sched_lock); + mutex_lock(&ip_vs_sched_mutex); list_for_each_entry(sched, &ip_vs_schedulers, n_list) { /* @@ -106,14 +101,14 @@ static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name) } if (strcmp(sched_name, sched->name)==0) { /* HIT */ - spin_unlock_bh(&ip_vs_sched_lock); + mutex_unlock(&ip_vs_sched_mutex); return sched; } if (sched->module) module_put(sched->module); } - spin_unlock_bh(&ip_vs_sched_lock); + mutex_unlock(&ip_vs_sched_mutex); return NULL; } @@ -153,21 +148,21 @@ void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler) void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg) { + struct ip_vs_scheduler *sched; + + sched = rcu_dereference(svc->scheduler); if (svc->fwmark) { IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n", - svc->scheduler->name, svc->fwmark, - svc->fwmark, msg); + sched->name, svc->fwmark, svc->fwmark, msg); #ifdef CONFIG_IP_VS_IPV6 } else if (svc->af == AF_INET6) { IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n", - svc->scheduler->name, - ip_vs_proto_name(svc->protocol), + sched->name, ip_vs_proto_name(svc->protocol), &svc->addr.in6, ntohs(svc->port), msg); #endif } else { IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n", - svc->scheduler->name, - ip_vs_proto_name(svc->protocol), + sched->name, ip_vs_proto_name(svc->protocol), &svc->addr.ip, ntohs(svc->port), msg); } } @@ -192,10 +187,10 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler) /* increase the module use count */ ip_vs_use_count_inc(); - spin_lock_bh(&ip_vs_sched_lock); + mutex_lock(&ip_vs_sched_mutex); if (!list_empty(&scheduler->n_list)) { - spin_unlock_bh(&ip_vs_sched_lock); + mutex_unlock(&ip_vs_sched_mutex); ip_vs_use_count_dec(); pr_err("%s(): [%s] scheduler already linked\n", __func__, scheduler->name); @@ -208,7 +203,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler) */ list_for_each_entry(sched, &ip_vs_schedulers, n_list) { if (strcmp(scheduler->name, sched->name) == 0) { - spin_unlock_bh(&ip_vs_sched_lock); + mutex_unlock(&ip_vs_sched_mutex); ip_vs_use_count_dec(); pr_err("%s(): [%s] scheduler already existed " "in the system\n", __func__, scheduler->name); @@ -219,7 +214,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler) * Add it into the d-linked scheduler list */ list_add(&scheduler->n_list, &ip_vs_schedulers); - spin_unlock_bh(&ip_vs_sched_lock); + mutex_unlock(&ip_vs_sched_mutex); pr_info("[%s] scheduler registered.\n", scheduler->name); @@ -237,9 +232,9 @@ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler) return -EINVAL; } - spin_lock_bh(&ip_vs_sched_lock); + mutex_lock(&ip_vs_sched_mutex); if (list_empty(&scheduler->n_list)) { - spin_unlock_bh(&ip_vs_sched_lock); + mutex_unlock(&ip_vs_sched_mutex); pr_err("%s(): [%s] scheduler is not in the list. failed\n", __func__, scheduler->name); return -EINVAL; @@ -249,7 +244,7 @@ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler) * Remove it from the d-linked scheduler list */ list_del(&scheduler->n_list); - spin_unlock_bh(&ip_vs_sched_lock); + mutex_unlock(&ip_vs_sched_mutex); /* decrease the module use count */ ip_vs_use_count_dec(); diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c index 89ead246ed3d..f3205925359a 100644 --- a/net/netfilter/ipvs/ip_vs_sed.c +++ b/net/netfilter/ipvs/ip_vs_sed.c @@ -79,7 +79,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) * new connections. */ - list_for_each_entry(dest, &svc->destinations, n_list) { + list_for_each_entry_rcu(dest, &svc->destinations, n_list) { if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && atomic_read(&dest->weight) > 0) { least = dest; @@ -94,7 +94,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) * Find the destination with the least load. */ nextstage: - list_for_each_entry_continue(dest, &svc->destinations, n_list) { + list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) { if (dest->flags & IP_VS_DEST_F_OVERLOAD) continue; doh = ip_vs_sed_dest_overhead(dest); @@ -134,6 +134,7 @@ static int __init ip_vs_sed_init(void) static void __exit ip_vs_sed_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_sed_scheduler); + synchronize_rcu(); } module_init(ip_vs_sed_init); diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index e33126994628..0df269d7c99f 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -53,7 +53,7 @@ * IPVS SH bucket */ struct ip_vs_sh_bucket { - struct ip_vs_dest *dest; /* real server (cache) */ + struct ip_vs_dest __rcu *dest; /* real server (cache) */ }; /* @@ -66,6 +66,10 @@ struct ip_vs_sh_bucket { #define IP_VS_SH_TAB_SIZE (1 << IP_VS_SH_TAB_BITS) #define IP_VS_SH_TAB_MASK (IP_VS_SH_TAB_SIZE - 1) +struct ip_vs_sh_state { + struct ip_vs_sh_bucket buckets[IP_VS_SH_TAB_SIZE]; + struct rcu_head rcu_head; +}; /* * Returns hash value for IPVS SH entry @@ -87,10 +91,9 @@ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *ad * Get ip_vs_dest associated with supplied parameters. */ static inline struct ip_vs_dest * -ip_vs_sh_get(int af, struct ip_vs_sh_bucket *tbl, - const union nf_inet_addr *addr) +ip_vs_sh_get(int af, struct ip_vs_sh_state *s, const union nf_inet_addr *addr) { - return (tbl[ip_vs_sh_hashkey(af, addr)]).dest; + return rcu_dereference(s->buckets[ip_vs_sh_hashkey(af, addr)].dest); } @@ -98,27 +101,32 @@ ip_vs_sh_get(int af, struct ip_vs_sh_bucket *tbl, * Assign all the hash buckets of the specified table with the service. */ static int -ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc) +ip_vs_sh_reassign(struct ip_vs_sh_state *s, struct ip_vs_service *svc) { int i; struct ip_vs_sh_bucket *b; struct list_head *p; struct ip_vs_dest *dest; int d_count; + bool empty; - b = tbl; + b = &s->buckets[0]; p = &svc->destinations; + empty = list_empty(p); d_count = 0; for (i=0; i<IP_VS_SH_TAB_SIZE; i++) { - if (list_empty(p)) { - b->dest = NULL; - } else { + dest = rcu_dereference_protected(b->dest, 1); + if (dest) + ip_vs_dest_put(dest); + if (empty) + RCU_INIT_POINTER(b->dest, NULL); + else { if (p == &svc->destinations) p = p->next; dest = list_entry(p, struct ip_vs_dest, n_list); - atomic_inc(&dest->refcnt); - b->dest = dest; + ip_vs_dest_hold(dest); + RCU_INIT_POINTER(b->dest, dest); IP_VS_DBG_BUF(6, "assigned i: %d dest: %s weight: %d\n", i, IP_VS_DBG_ADDR(svc->af, &dest->addr), @@ -140,16 +148,18 @@ ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc) /* * Flush all the hash buckets of the specified table. */ -static void ip_vs_sh_flush(struct ip_vs_sh_bucket *tbl) +static void ip_vs_sh_flush(struct ip_vs_sh_state *s) { int i; struct ip_vs_sh_bucket *b; + struct ip_vs_dest *dest; - b = tbl; + b = &s->buckets[0]; for (i=0; i<IP_VS_SH_TAB_SIZE; i++) { - if (b->dest) { - atomic_dec(&b->dest->refcnt); - b->dest = NULL; + dest = rcu_dereference_protected(b->dest, 1); + if (dest) { + ip_vs_dest_put(dest); + RCU_INIT_POINTER(b->dest, NULL); } b++; } @@ -158,51 +168,46 @@ static void ip_vs_sh_flush(struct ip_vs_sh_bucket *tbl) static int ip_vs_sh_init_svc(struct ip_vs_service *svc) { - struct ip_vs_sh_bucket *tbl; + struct ip_vs_sh_state *s; /* allocate the SH table for this service */ - tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE, - GFP_KERNEL); - if (tbl == NULL) + s = kzalloc(sizeof(struct ip_vs_sh_state), GFP_KERNEL); + if (s == NULL) return -ENOMEM; - svc->sched_data = tbl; + svc->sched_data = s; IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) allocated for " "current service\n", sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE); - /* assign the hash buckets with the updated service */ - ip_vs_sh_assign(tbl, svc); + /* assign the hash buckets with current dests */ + ip_vs_sh_reassign(s, svc); return 0; } -static int ip_vs_sh_done_svc(struct ip_vs_service *svc) +static void ip_vs_sh_done_svc(struct ip_vs_service *svc) { - struct ip_vs_sh_bucket *tbl = svc->sched_data; + struct ip_vs_sh_state *s = svc->sched_data; /* got to clean up hash buckets here */ - ip_vs_sh_flush(tbl); + ip_vs_sh_flush(s); /* release the table itself */ - kfree(svc->sched_data); + kfree_rcu(s, rcu_head); IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) released\n", sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE); - - return 0; } -static int ip_vs_sh_update_svc(struct ip_vs_service *svc) +static int ip_vs_sh_dest_changed(struct ip_vs_service *svc, + struct ip_vs_dest *dest) { - struct ip_vs_sh_bucket *tbl = svc->sched_data; - - /* got to clean up hash buckets here */ - ip_vs_sh_flush(tbl); + struct ip_vs_sh_state *s = svc->sched_data; /* assign the hash buckets with the updated service */ - ip_vs_sh_assign(tbl, svc); + ip_vs_sh_reassign(s, svc); return 0; } @@ -225,15 +230,15 @@ static struct ip_vs_dest * ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) { struct ip_vs_dest *dest; - struct ip_vs_sh_bucket *tbl; + struct ip_vs_sh_state *s; struct ip_vs_iphdr iph; ip_vs_fill_iph_addr_only(svc->af, skb, &iph); IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); - tbl = (struct ip_vs_sh_bucket *)svc->sched_data; - dest = ip_vs_sh_get(svc->af, tbl, &iph.saddr); + s = (struct ip_vs_sh_state *) svc->sched_data; + dest = ip_vs_sh_get(svc->af, s, &iph.saddr); if (!dest || !(dest->flags & IP_VS_DEST_F_AVAILABLE) || atomic_read(&dest->weight) <= 0 @@ -262,7 +267,9 @@ static struct ip_vs_scheduler ip_vs_sh_scheduler = .n_list = LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list), .init_service = ip_vs_sh_init_svc, .done_service = ip_vs_sh_done_svc, - .update_service = ip_vs_sh_update_svc, + .add_dest = ip_vs_sh_dest_changed, + .del_dest = ip_vs_sh_dest_changed, + .upd_dest = ip_vs_sh_dest_changed, .schedule = ip_vs_sh_schedule, }; @@ -276,6 +283,7 @@ static int __init ip_vs_sh_init(void) static void __exit ip_vs_sh_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_sh_scheduler); + synchronize_rcu(); } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 44fd10c539ac..8e57077e5540 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -531,9 +531,9 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, if (!ip_vs_sync_conn_needed(ipvs, cp, pkts)) return; - spin_lock(&ipvs->sync_buff_lock); + spin_lock_bh(&ipvs->sync_buff_lock); if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { - spin_unlock(&ipvs->sync_buff_lock); + spin_unlock_bh(&ipvs->sync_buff_lock); return; } @@ -552,7 +552,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, if (!buff) { buff = ip_vs_sync_buff_create_v0(ipvs); if (!buff) { - spin_unlock(&ipvs->sync_buff_lock); + spin_unlock_bh(&ipvs->sync_buff_lock); pr_err("ip_vs_sync_buff_create failed.\n"); return; } @@ -590,7 +590,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, sb_queue_tail(ipvs, ms); ms->sync_buff = NULL; } - spin_unlock(&ipvs->sync_buff_lock); + spin_unlock_bh(&ipvs->sync_buff_lock); /* synchronize its controller if it has */ cp = cp->control; @@ -641,9 +641,9 @@ sloop: pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN); } - spin_lock(&ipvs->sync_buff_lock); + spin_lock_bh(&ipvs->sync_buff_lock); if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { - spin_unlock(&ipvs->sync_buff_lock); + spin_unlock_bh(&ipvs->sync_buff_lock); return; } @@ -683,7 +683,7 @@ sloop: if (!buff) { buff = ip_vs_sync_buff_create(ipvs); if (!buff) { - spin_unlock(&ipvs->sync_buff_lock); + spin_unlock_bh(&ipvs->sync_buff_lock); pr_err("ip_vs_sync_buff_create failed.\n"); return; } @@ -750,7 +750,7 @@ sloop: } } - spin_unlock(&ipvs->sync_buff_lock); + spin_unlock_bh(&ipvs->sync_buff_lock); control: /* synchronize its controller if it has */ @@ -843,7 +843,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, kfree(param->pe_data); dest = cp->dest; - spin_lock(&cp->lock); + spin_lock_bh(&cp->lock); if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE && !(flags & IP_VS_CONN_F_TEMPLATE) && dest) { if (flags & IP_VS_CONN_F_INACTIVE) { @@ -857,24 +857,21 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, flags &= IP_VS_CONN_F_BACKUP_UPD_MASK; flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK; cp->flags = flags; - spin_unlock(&cp->lock); - if (!dest) { - dest = ip_vs_try_bind_dest(cp); - if (dest) - atomic_dec(&dest->refcnt); - } + spin_unlock_bh(&cp->lock); + if (!dest) + ip_vs_try_bind_dest(cp); } else { /* * Find the appropriate destination for the connection. * If it is not found the connection will remain unbound * but still handled. */ + rcu_read_lock(); dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr, param->vport, protocol, fwmark, flags); cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark); - if (dest) - atomic_dec(&dest->refcnt); + rcu_read_unlock(); if (!cp) { if (param->pe_data) kfree(param->pe_data); @@ -1692,11 +1689,7 @@ static int sync_thread_backup(void *data) break; } - /* disable bottom half, because it accesses the data - shared by softirq while getting/creating conns */ - local_bh_disable(); ip_vs_process_message(tinfo->net, tinfo->buf, len); - local_bh_enable(); } } diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c index bc1bfc48a17f..c60a81c4ce9a 100644 --- a/net/netfilter/ipvs/ip_vs_wlc.c +++ b/net/netfilter/ipvs/ip_vs_wlc.c @@ -51,7 +51,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) * new connections. */ - list_for_each_entry(dest, &svc->destinations, n_list) { + list_for_each_entry_rcu(dest, &svc->destinations, n_list) { if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && atomic_read(&dest->weight) > 0) { least = dest; @@ -66,7 +66,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) * Find the destination with the least load. */ nextstage: - list_for_each_entry_continue(dest, &svc->destinations, n_list) { + list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) { if (dest->flags & IP_VS_DEST_F_OVERLOAD) continue; doh = ip_vs_dest_conn_overhead(dest); @@ -106,6 +106,7 @@ static int __init ip_vs_wlc_init(void) static void __exit ip_vs_wlc_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_wlc_scheduler); + synchronize_rcu(); } module_init(ip_vs_wlc_init); diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c index 231be7dd547a..0e68555bceb9 100644 --- a/net/netfilter/ipvs/ip_vs_wrr.c +++ b/net/netfilter/ipvs/ip_vs_wrr.c @@ -29,14 +29,45 @@ #include <net/ip_vs.h> +/* The WRR algorithm depends on some caclulations: + * - mw: maximum weight + * - di: weight step, greatest common divisor from all weights + * - cw: current required weight + * As result, all weights are in the [di..mw] range with a step=di. + * + * First, we start with cw = mw and select dests with weight >= cw. + * Then cw is reduced with di and all dests are checked again. + * Last pass should be with cw = di. We have mw/di passes in total: + * + * pass 1: cw = max weight + * pass 2: cw = max weight - di + * pass 3: cw = max weight - 2 * di + * ... + * last pass: cw = di + * + * Weights are supposed to be >= di but we run in parallel with + * weight changes, it is possible some dest weight to be reduced + * below di, bad if it is the only available dest. + * + * So, we modify how mw is calculated, now it is reduced with (di - 1), + * so that last cw is 1 to catch such dests with weight below di: + * pass 1: cw = max weight - (di - 1) + * pass 2: cw = max weight - di - (di - 1) + * pass 3: cw = max weight - 2 * di - (di - 1) + * ... + * last pass: cw = 1 + * + */ + /* * current destination pointer for weighted round-robin scheduling */ struct ip_vs_wrr_mark { - struct list_head *cl; /* current list head */ + struct ip_vs_dest *cl; /* current dest or head */ int cw; /* current weight */ int mw; /* maximum weight */ int di; /* decreasing interval */ + struct rcu_head rcu_head; }; @@ -88,36 +119,41 @@ static int ip_vs_wrr_init_svc(struct ip_vs_service *svc) if (mark == NULL) return -ENOMEM; - mark->cl = &svc->destinations; - mark->cw = 0; - mark->mw = ip_vs_wrr_max_weight(svc); + mark->cl = list_entry(&svc->destinations, struct ip_vs_dest, n_list); mark->di = ip_vs_wrr_gcd_weight(svc); + mark->mw = ip_vs_wrr_max_weight(svc) - (mark->di - 1); + mark->cw = mark->mw; svc->sched_data = mark; return 0; } -static int ip_vs_wrr_done_svc(struct ip_vs_service *svc) +static void ip_vs_wrr_done_svc(struct ip_vs_service *svc) { + struct ip_vs_wrr_mark *mark = svc->sched_data; + /* * Release the mark variable */ - kfree(svc->sched_data); - - return 0; + kfree_rcu(mark, rcu_head); } -static int ip_vs_wrr_update_svc(struct ip_vs_service *svc) +static int ip_vs_wrr_dest_changed(struct ip_vs_service *svc, + struct ip_vs_dest *dest) { struct ip_vs_wrr_mark *mark = svc->sched_data; - mark->cl = &svc->destinations; - mark->mw = ip_vs_wrr_max_weight(svc); + spin_lock_bh(&svc->sched_lock); + mark->cl = list_entry(&svc->destinations, struct ip_vs_dest, n_list); mark->di = ip_vs_wrr_gcd_weight(svc); - if (mark->cw > mark->mw) - mark->cw = 0; + mark->mw = ip_vs_wrr_max_weight(svc) - (mark->di - 1); + if (mark->cw > mark->mw || !mark->cw) + mark->cw = mark->mw; + else if (mark->di > 1) + mark->cw = (mark->cw / mark->di) * mark->di + 1; + spin_unlock_bh(&svc->sched_lock); return 0; } @@ -128,80 +164,79 @@ static int ip_vs_wrr_update_svc(struct ip_vs_service *svc) static struct ip_vs_dest * ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) { - struct ip_vs_dest *dest; + struct ip_vs_dest *dest, *last, *stop = NULL; struct ip_vs_wrr_mark *mark = svc->sched_data; - struct list_head *p; + bool last_pass = false, restarted = false; IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); - /* - * This loop will always terminate, because mark->cw in (0, max_weight] - * and at least one server has its weight equal to max_weight. - */ - write_lock(&svc->sched_lock); - p = mark->cl; + spin_lock_bh(&svc->sched_lock); + dest = mark->cl; + /* No available dests? */ + if (mark->mw == 0) + goto err_noavail; + last = dest; + /* Stop only after all dests were checked for weight >= 1 (last pass) */ while (1) { - if (mark->cl == &svc->destinations) { - /* it is at the head of the destination list */ - - if (mark->cl == mark->cl->next) { - /* no dest entry */ - ip_vs_scheduler_err(svc, - "no destination available: " - "no destinations present"); - dest = NULL; - goto out; - } - - mark->cl = svc->destinations.next; - mark->cw -= mark->di; - if (mark->cw <= 0) { - mark->cw = mark->mw; - /* - * Still zero, which means no available servers. - */ - if (mark->cw == 0) { - mark->cl = &svc->destinations; - ip_vs_scheduler_err(svc, - "no destination available"); - dest = NULL; - goto out; - } - } - } else - mark->cl = mark->cl->next; - - if (mark->cl != &svc->destinations) { - /* not at the head of the list */ - dest = list_entry(mark->cl, struct ip_vs_dest, n_list); + list_for_each_entry_continue_rcu(dest, + &svc->destinations, + n_list) { if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && - atomic_read(&dest->weight) >= mark->cw) { - /* got it */ - break; - } + atomic_read(&dest->weight) >= mark->cw) + goto found; + if (dest == stop) + goto err_over; } - - if (mark->cl == p && mark->cw == mark->di) { - /* back to the start, and no dest is found. - It is only possible when all dests are OVERLOADED */ - dest = NULL; - ip_vs_scheduler_err(svc, - "no destination available: " - "all destinations are overloaded"); - goto out; + mark->cw -= mark->di; + if (mark->cw <= 0) { + mark->cw = mark->mw; + /* Stop if we tried last pass from first dest: + * 1. last_pass: we started checks when cw > di but + * then all dests were checked for w >= 1 + * 2. last was head: the first and only traversal + * was for weight >= 1, for all dests. + */ + if (last_pass || + &last->n_list == &svc->destinations) + goto err_over; + restarted = true; + } + last_pass = mark->cw <= mark->di; + if (last_pass && restarted && + &last->n_list != &svc->destinations) { + /* First traversal was for w >= 1 but only + * for dests after 'last', now do the same + * for all dests up to 'last'. + */ + stop = last; } } +found: IP_VS_DBG_BUF(6, "WRR: server %s:%u " "activeconns %d refcnt %d weight %d\n", IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port), atomic_read(&dest->activeconns), atomic_read(&dest->refcnt), atomic_read(&dest->weight)); + mark->cl = dest; out: - write_unlock(&svc->sched_lock); + spin_unlock_bh(&svc->sched_lock); return dest; + +err_noavail: + mark->cl = dest; + dest = NULL; + ip_vs_scheduler_err(svc, "no destination available"); + goto out; + +err_over: + mark->cl = dest; + dest = NULL; + ip_vs_scheduler_err(svc, "no destination available: " + "all destinations are overloaded"); + goto out; } @@ -212,7 +247,9 @@ static struct ip_vs_scheduler ip_vs_wrr_scheduler = { .n_list = LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list), .init_service = ip_vs_wrr_init_svc, .done_service = ip_vs_wrr_done_svc, - .update_service = ip_vs_wrr_update_svc, + .add_dest = ip_vs_wrr_dest_changed, + .del_dest = ip_vs_wrr_dest_changed, + .upd_dest = ip_vs_wrr_dest_changed, .schedule = ip_vs_wrr_schedule, }; @@ -224,6 +261,7 @@ static int __init ip_vs_wrr_init(void) static void __exit ip_vs_wrr_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_wrr_scheduler); + synchronize_rcu(); } module_init(ip_vs_wrr_init); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index ee6b7a9f1ec2..b75ff6429a04 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -17,6 +17,8 @@ * - not all connections have destination server, for example, * connections in backup server when fwmark is used * - bypass connections use daddr from packet + * - we can use dst without ref while sending in RCU section, we use + * ref when returning NF_ACCEPT for NAT-ed packet via loopback * LOCAL_OUT rules: * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING) * - skb->pkt_type is not set yet @@ -51,39 +53,54 @@ enum { */ IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */ IP_VS_RT_MODE_KNOWN_NH = 16,/* Route via remote addr */ + IP_VS_RT_MODE_TUNNEL = 32,/* Tunnel mode */ }; +static inline struct ip_vs_dest_dst *ip_vs_dest_dst_alloc(void) +{ + return kmalloc(sizeof(struct ip_vs_dest_dst), GFP_ATOMIC); +} + +static inline void ip_vs_dest_dst_free(struct ip_vs_dest_dst *dest_dst) +{ + kfree(dest_dst); +} + /* * Destination cache to speed up outgoing route lookup */ static inline void -__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst, - u32 dst_cookie) +__ip_vs_dst_set(struct ip_vs_dest *dest, struct ip_vs_dest_dst *dest_dst, + struct dst_entry *dst, u32 dst_cookie) { - struct dst_entry *old_dst; + struct ip_vs_dest_dst *old; + + old = rcu_dereference_protected(dest->dest_dst, + lockdep_is_held(&dest->dst_lock)); - old_dst = dest->dst_cache; - dest->dst_cache = dst; - dest->dst_rtos = rtos; - dest->dst_cookie = dst_cookie; - dst_release(old_dst); + if (dest_dst) { + dest_dst->dst_cache = dst; + dest_dst->dst_cookie = dst_cookie; + } + rcu_assign_pointer(dest->dest_dst, dest_dst); + + if (old) + call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free); } -static inline struct dst_entry * -__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos) +static inline struct ip_vs_dest_dst * +__ip_vs_dst_check(struct ip_vs_dest *dest) { - struct dst_entry *dst = dest->dst_cache; + struct ip_vs_dest_dst *dest_dst = rcu_dereference(dest->dest_dst); + struct dst_entry *dst; - if (!dst) + if (!dest_dst) return NULL; - if ((dst->obsolete || rtos != dest->dst_rtos) && - dst->ops->check(dst, dest->dst_cookie) == NULL) { - dest->dst_cache = NULL; - dst_release(dst); + dst = dest_dst->dst_cache; + if (dst->obsolete && + dst->ops->check(dst, dest_dst->dst_cookie) == NULL) return NULL; - } - dst_hold(dst); - return dst; + return dest_dst; } static inline bool @@ -104,7 +121,7 @@ __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu) /* Get route to daddr, update *saddr, optionally bind route to saddr */ static struct rtable *do_output_route4(struct net *net, __be32 daddr, - u32 rtos, int rt_mode, __be32 *saddr) + int rt_mode, __be32 *saddr) { struct flowi4 fl4; struct rtable *rt; @@ -113,7 +130,6 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr, memset(&fl4, 0, sizeof(fl4)); fl4.daddr = daddr; fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0; - fl4.flowi4_tos = rtos; fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ? FLOWI_FLAG_KNOWN_NH : 0; @@ -124,7 +140,7 @@ retry: if (PTR_ERR(rt) == -EINVAL && *saddr && rt_mode & IP_VS_RT_MODE_CONNECT && !loop) { *saddr = 0; - flowi4_update_output(&fl4, 0, rtos, daddr, 0); + flowi4_update_output(&fl4, 0, 0, daddr, 0); goto retry; } IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr); @@ -132,7 +148,7 @@ retry: } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) { ip_rt_put(rt); *saddr = fl4.saddr; - flowi4_update_output(&fl4, 0, rtos, daddr, fl4.saddr); + flowi4_update_output(&fl4, 0, 0, daddr, fl4.saddr); loop++; goto retry; } @@ -141,113 +157,140 @@ retry: } /* Get route to destination or remote server */ -static struct rtable * +static int __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, - __be32 daddr, u32 rtos, int rt_mode, __be32 *ret_saddr) + __be32 daddr, int rt_mode, __be32 *ret_saddr) { struct net *net = dev_net(skb_dst(skb)->dev); + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_dest_dst *dest_dst; struct rtable *rt; /* Route to the other host */ struct rtable *ort; /* Original route */ - int local; + struct iphdr *iph; + __be16 df; + int mtu; + int local, noref = 1; if (dest) { - spin_lock(&dest->dst_lock); - if (!(rt = (struct rtable *) - __ip_vs_dst_check(dest, rtos))) { - rt = do_output_route4(net, dest->addr.ip, rtos, - rt_mode, &dest->dst_saddr.ip); + dest_dst = __ip_vs_dst_check(dest); + if (likely(dest_dst)) + rt = (struct rtable *) dest_dst->dst_cache; + else { + dest_dst = ip_vs_dest_dst_alloc(); + spin_lock_bh(&dest->dst_lock); + if (!dest_dst) { + __ip_vs_dst_set(dest, NULL, NULL, 0); + spin_unlock_bh(&dest->dst_lock); + goto err_unreach; + } + rt = do_output_route4(net, dest->addr.ip, rt_mode, + &dest_dst->dst_saddr.ip); if (!rt) { - spin_unlock(&dest->dst_lock); - return NULL; + __ip_vs_dst_set(dest, NULL, NULL, 0); + spin_unlock_bh(&dest->dst_lock); + ip_vs_dest_dst_free(dest_dst); + goto err_unreach; } - __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0); - IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, " - "rtos=%X\n", - &dest->addr.ip, &dest->dst_saddr.ip, - atomic_read(&rt->dst.__refcnt), rtos); + __ip_vs_dst_set(dest, dest_dst, &rt->dst, 0); + spin_unlock_bh(&dest->dst_lock); + IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n", + &dest->addr.ip, &dest_dst->dst_saddr.ip, + atomic_read(&rt->dst.__refcnt)); } daddr = dest->addr.ip; if (ret_saddr) - *ret_saddr = dest->dst_saddr.ip; - spin_unlock(&dest->dst_lock); + *ret_saddr = dest_dst->dst_saddr.ip; } else { __be32 saddr = htonl(INADDR_ANY); + noref = 0; + /* For such unconfigured boxes avoid many route lookups * for performance reasons because we do not remember saddr */ rt_mode &= ~IP_VS_RT_MODE_CONNECT; - rt = do_output_route4(net, daddr, rtos, rt_mode, &saddr); + rt = do_output_route4(net, daddr, rt_mode, &saddr); if (!rt) - return NULL; + goto err_unreach; if (ret_saddr) *ret_saddr = saddr; } - local = rt->rt_flags & RTCF_LOCAL; + local = (rt->rt_flags & RTCF_LOCAL) ? 1 : 0; if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) & rt_mode)) { IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n", (rt->rt_flags & RTCF_LOCAL) ? "local":"non-local", &daddr); - ip_rt_put(rt); - return NULL; - } - if (local && !(rt_mode & IP_VS_RT_MODE_RDR) && - !((ort = skb_rtable(skb)) && ort->rt_flags & RTCF_LOCAL)) { - IP_VS_DBG_RL("Redirect from non-local address %pI4 to local " - "requires NAT method, dest: %pI4\n", - &ip_hdr(skb)->daddr, &daddr); - ip_rt_put(rt); - return NULL; + goto err_put; } - if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) { - IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 " - "to non-local address, dest: %pI4\n", - &ip_hdr(skb)->saddr, &daddr); - ip_rt_put(rt); - return NULL; + iph = ip_hdr(skb); + if (likely(!local)) { + if (unlikely(ipv4_is_loopback(iph->saddr))) { + IP_VS_DBG_RL("Stopping traffic from loopback address " + "%pI4 to non-local address, dest: %pI4\n", + &iph->saddr, &daddr); + goto err_put; + } + } else { + ort = skb_rtable(skb); + if (!(rt_mode & IP_VS_RT_MODE_RDR) && + !(ort->rt_flags & RTCF_LOCAL)) { + IP_VS_DBG_RL("Redirect from non-local address %pI4 to " + "local requires NAT method, dest: %pI4\n", + &iph->daddr, &daddr); + goto err_put; + } + /* skb to local stack, preserve old route */ + if (!noref) + ip_rt_put(rt); + return local; } - return rt; -} - -/* Reroute packet to local IPv4 stack after DNAT */ -static int -__ip_vs_reroute_locally(struct sk_buff *skb) -{ - struct rtable *rt = skb_rtable(skb); - struct net_device *dev = rt->dst.dev; - struct net *net = dev_net(dev); - struct iphdr *iph = ip_hdr(skb); - - if (rt_is_input_route(rt)) { - unsigned long orefdst = skb->_skb_refdst; - - if (ip_route_input(skb, iph->daddr, iph->saddr, - iph->tos, skb->dev)) - return 0; - refdst_drop(orefdst); + if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) { + mtu = dst_mtu(&rt->dst); + df = iph->frag_off & htons(IP_DF); } else { - struct flowi4 fl4 = { - .daddr = iph->daddr, - .saddr = iph->saddr, - .flowi4_tos = RT_TOS(iph->tos), - .flowi4_mark = skb->mark, - }; - - rt = ip_route_output_key(net, &fl4); - if (IS_ERR(rt)) - return 0; - if (!(rt->rt_flags & RTCF_LOCAL)) { - ip_rt_put(rt); - return 0; + struct sock *sk = skb->sk; + + mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); + if (mtu < 68) { + IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); + goto err_put; } - /* Drop old route. */ - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); + ort = skb_rtable(skb); + if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT) + ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu); + /* MTU check allowed? */ + df = sysctl_pmtu_disc(ipvs) ? iph->frag_off & htons(IP_DF) : 0; } - return 1; + + /* MTU checking */ + if (unlikely(df && skb->len > mtu && !skb_is_gso(skb))) { + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + IP_VS_DBG(1, "frag needed for %pI4\n", &iph->saddr); + goto err_put; + } + + skb_dst_drop(skb); + if (noref) { + if (!local) + skb_dst_set_noref_force(skb, &rt->dst); + else + skb_dst_set(skb, dst_clone(&rt->dst)); + } else + skb_dst_set(skb, &rt->dst); + + return local; + +err_put: + if (!noref) + ip_rt_put(rt); + return -1; + +err_unreach: + dst_link_failure(skb); + return -1; } #ifdef CONFIG_IP_VS_IPV6 @@ -294,44 +337,57 @@ out_err: /* * Get route to destination or remote server */ -static struct rt6_info * +static int __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, struct in6_addr *daddr, struct in6_addr *ret_saddr, - int do_xfrm, int rt_mode) + struct ip_vs_iphdr *ipvsh, int do_xfrm, int rt_mode) { struct net *net = dev_net(skb_dst(skb)->dev); + struct ip_vs_dest_dst *dest_dst; struct rt6_info *rt; /* Route to the other host */ struct rt6_info *ort; /* Original route */ struct dst_entry *dst; - int local; + int mtu; + int local, noref = 1; if (dest) { - spin_lock(&dest->dst_lock); - rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0); - if (!rt) { + dest_dst = __ip_vs_dst_check(dest); + if (likely(dest_dst)) + rt = (struct rt6_info *) dest_dst->dst_cache; + else { u32 cookie; + dest_dst = ip_vs_dest_dst_alloc(); + spin_lock_bh(&dest->dst_lock); + if (!dest_dst) { + __ip_vs_dst_set(dest, NULL, NULL, 0); + spin_unlock_bh(&dest->dst_lock); + goto err_unreach; + } dst = __ip_vs_route_output_v6(net, &dest->addr.in6, - &dest->dst_saddr.in6, + &dest_dst->dst_saddr.in6, do_xfrm); if (!dst) { - spin_unlock(&dest->dst_lock); - return NULL; + __ip_vs_dst_set(dest, NULL, NULL, 0); + spin_unlock_bh(&dest->dst_lock); + ip_vs_dest_dst_free(dest_dst); + goto err_unreach; } rt = (struct rt6_info *) dst; cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; - __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie); + __ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie); + spin_unlock_bh(&dest->dst_lock); IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", - &dest->addr.in6, &dest->dst_saddr.in6, + &dest->addr.in6, &dest_dst->dst_saddr.in6, atomic_read(&rt->dst.__refcnt)); } if (ret_saddr) - *ret_saddr = dest->dst_saddr.in6; - spin_unlock(&dest->dst_lock); + *ret_saddr = dest_dst->dst_saddr.in6; } else { + noref = 0; dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm); if (!dst) - return NULL; + goto err_unreach; rt = (struct rt6_info *) dst; } @@ -340,86 +396,137 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, rt_mode)) { IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n", local ? "local":"non-local", daddr); - dst_release(&rt->dst); - return NULL; + goto err_put; } - if (local && !(rt_mode & IP_VS_RT_MODE_RDR) && - !((ort = (struct rt6_info *) skb_dst(skb)) && - __ip_vs_is_local_route6(ort))) { - IP_VS_DBG_RL("Redirect from non-local address %pI6c to local " - "requires NAT method, dest: %pI6c\n", - &ipv6_hdr(skb)->daddr, daddr); - dst_release(&rt->dst); - return NULL; + if (likely(!local)) { + if (unlikely((!skb->dev || skb->dev->flags & IFF_LOOPBACK) && + ipv6_addr_type(&ipv6_hdr(skb)->saddr) & + IPV6_ADDR_LOOPBACK)) { + IP_VS_DBG_RL("Stopping traffic from loopback address " + "%pI6c to non-local address, " + "dest: %pI6c\n", + &ipv6_hdr(skb)->saddr, daddr); + goto err_put; + } + } else { + ort = (struct rt6_info *) skb_dst(skb); + if (!(rt_mode & IP_VS_RT_MODE_RDR) && + !__ip_vs_is_local_route6(ort)) { + IP_VS_DBG_RL("Redirect from non-local address %pI6c " + "to local requires NAT method, " + "dest: %pI6c\n", + &ipv6_hdr(skb)->daddr, daddr); + goto err_put; + } + /* skb to local stack, preserve old route */ + if (!noref) + dst_release(&rt->dst); + return local; } - if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) && - ipv6_addr_type(&ipv6_hdr(skb)->saddr) & - IPV6_ADDR_LOOPBACK)) { - IP_VS_DBG_RL("Stopping traffic from loopback address %pI6c " - "to non-local address, dest: %pI6c\n", - &ipv6_hdr(skb)->saddr, daddr); - dst_release(&rt->dst); - return NULL; + + /* MTU checking */ + if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) + mtu = dst_mtu(&rt->dst); + else { + struct sock *sk = skb->sk; + + mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); + if (mtu < IPV6_MIN_MTU) { + IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__, + IPV6_MIN_MTU); + goto err_put; + } + ort = (struct rt6_info *) skb_dst(skb); + if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT) + ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu); } - return rt; + if (unlikely(__mtu_check_toobig_v6(skb, mtu))) { + if (!skb->dev) + skb->dev = net->loopback_dev; + /* only send ICMP too big on first fragment */ + if (!ipvsh->fragoffs) + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + IP_VS_DBG(1, "frag needed for %pI6c\n", &ipv6_hdr(skb)->saddr); + goto err_put; + } + + skb_dst_drop(skb); + if (noref) { + if (!local) + skb_dst_set_noref_force(skb, &rt->dst); + else + skb_dst_set(skb, dst_clone(&rt->dst)); + } else + skb_dst_set(skb, &rt->dst); + + return local; + +err_put: + if (!noref) + dst_release(&rt->dst); + return -1; + +err_unreach: + dst_link_failure(skb); + return -1; } #endif -/* - * Release dest->dst_cache before a dest is removed - */ -void -ip_vs_dst_reset(struct ip_vs_dest *dest) +/* return NF_ACCEPT to allow forwarding or other NF_xxx on error */ +static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb, + struct ip_vs_conn *cp) { - struct dst_entry *old_dst; + int ret = NF_ACCEPT; + + skb->ipvs_property = 1; + if (unlikely(cp->flags & IP_VS_CONN_F_NFCT)) + ret = ip_vs_confirm_conntrack(skb); + if (ret == NF_ACCEPT) { + nf_reset(skb); + skb_forward_csum(skb); + } + return ret; +} + +/* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */ +static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb, + struct ip_vs_conn *cp, int local) +{ + int ret = NF_STOLEN; - old_dst = dest->dst_cache; - dest->dst_cache = NULL; - dst_release(old_dst); - dest->dst_saddr.ip = 0; + skb->ipvs_property = 1; + if (likely(!(cp->flags & IP_VS_CONN_F_NFCT))) + ip_vs_notrack(skb); + else + ip_vs_update_conntrack(skb, cp, 1); + if (!local) { + skb_forward_csum(skb); + NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev, + dst_output); + } else + ret = NF_ACCEPT; + return ret; } -#define IP_VS_XMIT_TUNNEL(skb, cp) \ -({ \ - int __ret = NF_ACCEPT; \ - \ - (skb)->ipvs_property = 1; \ - if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT)) \ - __ret = ip_vs_confirm_conntrack(skb); \ - if (__ret == NF_ACCEPT) { \ - nf_reset(skb); \ - skb_forward_csum(skb); \ - } \ - __ret; \ -}) - -#define IP_VS_XMIT_NAT(pf, skb, cp, local) \ -do { \ - (skb)->ipvs_property = 1; \ - if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \ - ip_vs_notrack(skb); \ - else \ - ip_vs_update_conntrack(skb, cp, 1); \ - if (local) \ - return NF_ACCEPT; \ - skb_forward_csum(skb); \ - NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ - skb_dst(skb)->dev, dst_output); \ -} while (0) - -#define IP_VS_XMIT(pf, skb, cp, local) \ -do { \ - (skb)->ipvs_property = 1; \ - if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \ - ip_vs_notrack(skb); \ - if (local) \ - return NF_ACCEPT; \ - skb_forward_csum(skb); \ - NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ - skb_dst(skb)->dev, dst_output); \ -} while (0) +/* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */ +static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb, + struct ip_vs_conn *cp, int local) +{ + int ret = NF_STOLEN; + + skb->ipvs_property = 1; + if (likely(!(cp->flags & IP_VS_CONN_F_NFCT))) + ip_vs_notrack(skb); + if (!local) { + skb_forward_csum(skb); + NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev, + dst_output); + } else + ret = NF_ACCEPT; + return ret; +} /* @@ -430,7 +537,7 @@ ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { /* we do not touch skb and do not need pskb ptr */ - IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); + return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1); } @@ -443,52 +550,29 @@ int ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { - struct rtable *rt; /* Route to the other host */ struct iphdr *iph = ip_hdr(skb); - int mtu; EnterFunction(10); - if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, RT_TOS(iph->tos), - IP_VS_RT_MODE_NON_LOCAL, NULL))) - goto tx_error_icmp; - - /* MTU checking */ - mtu = dst_mtu(&rt->dst); - if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) && - !skb_is_gso(skb)) { - ip_rt_put(rt); - icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); - IP_VS_DBG_RL("%s(): frag needed\n", __func__); + rcu_read_lock(); + if (__ip_vs_get_out_rt(skb, NULL, iph->daddr, IP_VS_RT_MODE_NON_LOCAL, + NULL) < 0) goto tx_error; - } - /* - * Call ip_send_check because we are not sure it is called - * after ip_defrag. Is copy-on-write needed? - */ - if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) { - ip_rt_put(rt); - return NF_STOLEN; - } - ip_send_check(ip_hdr(skb)); - - /* drop old route */ - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); + ip_send_check(iph); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0); + ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; - tx_error_icmp: - dst_link_failure(skb); tx_error: kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -496,60 +580,27 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, #ifdef CONFIG_IP_VS_IPV6 int ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, - struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph) + struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { - struct rt6_info *rt; /* Route to the other host */ - int mtu; - EnterFunction(10); - rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr.in6, NULL, 0, - IP_VS_RT_MODE_NON_LOCAL); - if (!rt) - goto tx_error_icmp; - - /* MTU checking */ - mtu = dst_mtu(&rt->dst); - if (__mtu_check_toobig_v6(skb, mtu)) { - if (!skb->dev) { - struct net *net = dev_net(skb_dst(skb)->dev); - - skb->dev = net->loopback_dev; - } - /* only send ICMP too big on first fragment */ - if (!iph->fragoffs) - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - dst_release(&rt->dst); - IP_VS_DBG_RL("%s(): frag needed\n", __func__); + rcu_read_lock(); + if (__ip_vs_get_out_rt_v6(skb, NULL, &ipvsh->daddr.in6, NULL, + ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0) goto tx_error; - } - - /* - * Call ip_send_check because we are not sure it is called - * after ip_defrag. Is copy-on-write needed? - */ - skb = skb_share_check(skb, GFP_ATOMIC); - if (unlikely(skb == NULL)) { - dst_release(&rt->dst); - return NF_STOLEN; - } - - /* drop old route */ - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0); + ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; - tx_error_icmp: - dst_link_failure(skb); tx_error: kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -564,29 +615,30 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { struct rtable *rt; /* Route to the other host */ - int mtu; - struct iphdr *iph = ip_hdr(skb); - int local; + int local, rc, was_input; EnterFunction(10); + rcu_read_lock(); /* check if it is a connection of no-client-port */ if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { __be16 _pt, *p; - p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt); + + p = skb_header_pointer(skb, ipvsh->len, sizeof(_pt), &_pt); if (p == NULL) goto tx_error; ip_vs_conn_fill_cport(cp, *p); IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); } - if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, - RT_TOS(iph->tos), - IP_VS_RT_MODE_LOCAL | - IP_VS_RT_MODE_NON_LOCAL | - IP_VS_RT_MODE_RDR, NULL))) - goto tx_error_icmp; - local = rt->rt_flags & RTCF_LOCAL; + was_input = rt_is_input_route(skb_rtable(skb)); + local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, + IP_VS_RT_MODE_LOCAL | + IP_VS_RT_MODE_NON_LOCAL | + IP_VS_RT_MODE_RDR, NULL); + if (local < 0) + goto tx_error; + rt = skb_rtable(skb); /* * Avoid duplicate tuple in reply direction for NAT traffic * to local address when connection is sync-ed @@ -600,57 +652,31 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): " "stopping DNAT to local address"); - goto tx_error_put; + goto tx_error; } } #endif /* From world but DNAT to loopback address? */ - if (local && ipv4_is_loopback(cp->daddr.ip) && - rt_is_input_route(skb_rtable(skb))) { + if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) { IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): " "stopping DNAT to loopback address"); - goto tx_error_put; - } - - /* MTU checking */ - mtu = dst_mtu(&rt->dst); - if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) && - !skb_is_gso(skb)) { - icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); - IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0, - "ip_vs_nat_xmit(): frag needed for"); - goto tx_error_put; + goto tx_error; } /* copy-on-write the packet before mangling it */ if (!skb_make_writable(skb, sizeof(struct iphdr))) - goto tx_error_put; + goto tx_error; if (skb_cow(skb, rt->dst.dev->hard_header_len)) - goto tx_error_put; + goto tx_error; /* mangle the packet */ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh)) - goto tx_error_put; + goto tx_error; ip_hdr(skb)->daddr = cp->daddr.ip; ip_send_check(ip_hdr(skb)); - if (!local) { - /* drop old route */ - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - } else { - ip_rt_put(rt); - /* - * Some IPv4 replies get local address from routes, - * not from iph, so while we DNAT after routing - * we need this second input/output route. - */ - if (!__ip_vs_reroute_locally(skb)) - goto tx_error; - } - IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT"); /* FIXME: when application helper enlarges the packet and the length @@ -660,49 +686,48 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local); + rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); + rcu_read_unlock(); LeaveFunction(10); - return NF_STOLEN; + return rc; - tx_error_icmp: - dst_link_failure(skb); tx_error: kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; - tx_error_put: - ip_rt_put(rt); - goto tx_error; } #ifdef CONFIG_IP_VS_IPV6 int ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, - struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph) + struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { struct rt6_info *rt; /* Route to the other host */ - int mtu; - int local; + int local, rc; EnterFunction(10); + rcu_read_lock(); /* check if it is a connection of no-client-port */ - if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !iph->fragoffs)) { + if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !ipvsh->fragoffs)) { __be16 _pt, *p; - p = skb_header_pointer(skb, iph->len, sizeof(_pt), &_pt); + p = skb_header_pointer(skb, ipvsh->len, sizeof(_pt), &_pt); if (p == NULL) goto tx_error; ip_vs_conn_fill_cport(cp, *p); IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); } - if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, - 0, (IP_VS_RT_MODE_LOCAL | - IP_VS_RT_MODE_NON_LOCAL | - IP_VS_RT_MODE_RDR)))) - goto tx_error_icmp; - local = __ip_vs_is_local_route6(rt); + local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, + ipvsh, 0, + IP_VS_RT_MODE_LOCAL | + IP_VS_RT_MODE_NON_LOCAL | + IP_VS_RT_MODE_RDR); + if (local < 0) + goto tx_error; + rt = (struct rt6_info *) skb_dst(skb); /* * Avoid duplicate tuple in reply direction for NAT traffic * to local address when connection is sync-ed @@ -716,7 +741,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0, "ip_vs_nat_xmit_v6(): " "stopping DNAT to local address"); - goto tx_error_put; + goto tx_error; } } #endif @@ -727,46 +752,21 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0, "ip_vs_nat_xmit_v6(): " "stopping DNAT to loopback address"); - goto tx_error_put; - } - - /* MTU checking */ - mtu = dst_mtu(&rt->dst); - if (__mtu_check_toobig_v6(skb, mtu)) { - if (!skb->dev) { - struct net *net = dev_net(skb_dst(skb)->dev); - - skb->dev = net->loopback_dev; - } - /* only send ICMP too big on first fragment */ - if (!iph->fragoffs) - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0, - "ip_vs_nat_xmit_v6(): frag needed for"); - goto tx_error_put; + goto tx_error; } /* copy-on-write the packet before mangling it */ if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) - goto tx_error_put; + goto tx_error; if (skb_cow(skb, rt->dst.dev->hard_header_len)) - goto tx_error_put; + goto tx_error; /* mangle the packet */ - if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, iph)) + if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh)) goto tx_error; ipv6_hdr(skb)->daddr = cp->daddr.in6; - if (!local || !skb->dev) { - /* drop the old route when skb is not shared */ - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - } else { - /* destined to loopback, do we need to change route? */ - dst_release(&rt->dst); - } - IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT"); /* FIXME: when application helper enlarges the packet and the length @@ -776,20 +776,17 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local); + rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); + rcu_read_unlock(); LeaveFunction(10); - return NF_STOLEN; + return rc; -tx_error_icmp: - dst_link_failure(skb); tx_error: LeaveFunction(10); kfree_skb(skb); + rcu_read_unlock(); return NF_STOLEN; -tx_error_put: - dst_release(&rt->dst); - goto tx_error; } #endif @@ -826,56 +823,40 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, __be16 df; struct iphdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ - int mtu; - int ret; + int ret, local; EnterFunction(10); - if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, - RT_TOS(tos), IP_VS_RT_MODE_LOCAL | - IP_VS_RT_MODE_NON_LOCAL | - IP_VS_RT_MODE_CONNECT, - &saddr))) - goto tx_error_icmp; - if (rt->rt_flags & RTCF_LOCAL) { - ip_rt_put(rt); - IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); + rcu_read_lock(); + local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, + IP_VS_RT_MODE_LOCAL | + IP_VS_RT_MODE_NON_LOCAL | + IP_VS_RT_MODE_CONNECT | + IP_VS_RT_MODE_TUNNEL, &saddr); + if (local < 0) + goto tx_error; + if (local) { + rcu_read_unlock(); + return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1); } + rt = skb_rtable(skb); tdev = rt->dst.dev; - mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); - if (mtu < 68) { - IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); - goto tx_error_put; - } - if (rt_is_output_route(skb_rtable(skb))) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - /* Copy DF, reset fragment offset and MF */ df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0; - if (df && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb)) { - icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); - IP_VS_DBG_RL("%s(): frag needed\n", __func__); - goto tx_error_put; - } - /* * Okay, now see if we can stuff it in the buffer as-is. */ max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr); - if (skb_headroom(skb) < max_headroom - || skb_cloned(skb) || skb_shared(skb)) { + if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) { struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); - if (!new_skb) { - ip_rt_put(rt); - kfree_skb(skb); - IP_VS_ERR_RL("%s(): no memory\n", __func__); - return NF_STOLEN; - } + + if (!new_skb) + goto tx_error; consume_skb(skb); skb = new_skb; old_iph = ip_hdr(skb); @@ -890,10 +871,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, skb_reset_network_header(skb); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - /* drop old route */ - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - /* * Push down and install the IPIP header. */ @@ -911,25 +888,22 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - ret = IP_VS_XMIT_TUNNEL(skb, cp); + ret = ip_vs_tunnel_xmit_prepare(skb, cp); if (ret == NF_ACCEPT) ip_local_out(skb); else if (ret == NF_DROP) kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; - tx_error_icmp: - dst_link_failure(skb); tx_error: kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; -tx_error_put: - ip_rt_put(rt); - goto tx_error; } #ifdef CONFIG_IP_VS_IPV6 @@ -943,60 +917,37 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct ipv6hdr *old_iph = ipv6_hdr(skb); struct ipv6hdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ - int mtu; - int ret; + int ret, local; EnterFunction(10); - if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, - &saddr, 1, (IP_VS_RT_MODE_LOCAL | - IP_VS_RT_MODE_NON_LOCAL)))) - goto tx_error_icmp; - if (__ip_vs_is_local_route6(rt)) { - dst_release(&rt->dst); - IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1); + rcu_read_lock(); + local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, + &saddr, ipvsh, 1, + IP_VS_RT_MODE_LOCAL | + IP_VS_RT_MODE_NON_LOCAL | + IP_VS_RT_MODE_TUNNEL); + if (local < 0) + goto tx_error; + if (local) { + rcu_read_unlock(); + return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1); } + rt = (struct rt6_info *) skb_dst(skb); tdev = rt->dst.dev; - mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); - if (mtu < IPV6_MIN_MTU) { - IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__, - IPV6_MIN_MTU); - goto tx_error_put; - } - if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - - /* MTU checking: Notice that 'mtu' have been adjusted before hand */ - if (__mtu_check_toobig_v6(skb, mtu)) { - if (!skb->dev) { - struct net *net = dev_net(skb_dst(skb)->dev); - - skb->dev = net->loopback_dev; - } - /* only send ICMP too big on first fragment */ - if (!ipvsh->fragoffs) - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - IP_VS_DBG_RL("%s(): frag needed\n", __func__); - goto tx_error_put; - } - /* * Okay, now see if we can stuff it in the buffer as-is. */ max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr); - if (skb_headroom(skb) < max_headroom - || skb_cloned(skb) || skb_shared(skb)) { + if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) { struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); - if (!new_skb) { - dst_release(&rt->dst); - kfree_skb(skb); - IP_VS_ERR_RL("%s(): no memory\n", __func__); - return NF_STOLEN; - } + + if (!new_skb) + goto tx_error; consume_skb(skb); skb = new_skb; old_iph = ipv6_hdr(skb); @@ -1008,10 +959,6 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, skb_reset_network_header(skb); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - /* drop old route */ - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - /* * Push down and install the IPIP header. */ @@ -1029,25 +976,22 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - ret = IP_VS_XMIT_TUNNEL(skb, cp); + ret = ip_vs_tunnel_xmit_prepare(skb, cp); if (ret == NF_ACCEPT) ip6_local_out(skb); else if (ret == NF_DROP) kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; -tx_error_icmp: - dst_link_failure(skb); tx_error: kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; -tx_error_put: - dst_release(&rt->dst); - goto tx_error; } #endif @@ -1060,59 +1004,36 @@ int ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { - struct rtable *rt; /* Route to the other host */ - struct iphdr *iph = ip_hdr(skb); - int mtu; + int local; EnterFunction(10); - if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, - RT_TOS(iph->tos), - IP_VS_RT_MODE_LOCAL | - IP_VS_RT_MODE_NON_LOCAL | - IP_VS_RT_MODE_KNOWN_NH, NULL))) - goto tx_error_icmp; - if (rt->rt_flags & RTCF_LOCAL) { - ip_rt_put(rt); - IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); - } - - /* MTU checking */ - mtu = dst_mtu(&rt->dst); - if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu && - !skb_is_gso(skb)) { - icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); - ip_rt_put(rt); - IP_VS_DBG_RL("%s(): frag needed\n", __func__); + rcu_read_lock(); + local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, + IP_VS_RT_MODE_LOCAL | + IP_VS_RT_MODE_NON_LOCAL | + IP_VS_RT_MODE_KNOWN_NH, NULL); + if (local < 0) goto tx_error; + if (local) { + rcu_read_unlock(); + return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1); } - /* - * Call ip_send_check because we are not sure it is called - * after ip_defrag. Is copy-on-write needed? - */ - if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) { - ip_rt_put(rt); - return NF_STOLEN; - } ip_send_check(ip_hdr(skb)); - /* drop old route */ - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0); + ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; - tx_error_icmp: - dst_link_failure(skb); tx_error: kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -1120,64 +1041,36 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, #ifdef CONFIG_IP_VS_IPV6 int ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, - struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph) + struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { - struct rt6_info *rt; /* Route to the other host */ - int mtu; + int local; EnterFunction(10); - if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, - 0, (IP_VS_RT_MODE_LOCAL | - IP_VS_RT_MODE_NON_LOCAL)))) - goto tx_error_icmp; - if (__ip_vs_is_local_route6(rt)) { - dst_release(&rt->dst); - IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1); - } - - /* MTU checking */ - mtu = dst_mtu(&rt->dst); - if (__mtu_check_toobig_v6(skb, mtu)) { - if (!skb->dev) { - struct net *net = dev_net(skb_dst(skb)->dev); - - skb->dev = net->loopback_dev; - } - /* only send ICMP too big on first fragment */ - if (!iph->fragoffs) - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - dst_release(&rt->dst); - IP_VS_DBG_RL("%s(): frag needed\n", __func__); + rcu_read_lock(); + local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, + ipvsh, 0, + IP_VS_RT_MODE_LOCAL | + IP_VS_RT_MODE_NON_LOCAL); + if (local < 0) goto tx_error; + if (local) { + rcu_read_unlock(); + return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1); } - /* - * Call ip_send_check because we are not sure it is called - * after ip_defrag. Is copy-on-write needed? - */ - skb = skb_share_check(skb, GFP_ATOMIC); - if (unlikely(skb == NULL)) { - dst_release(&rt->dst); - return NF_STOLEN; - } - - /* drop old route */ - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0); + ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; -tx_error_icmp: - dst_link_failure(skb); tx_error: kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -1194,10 +1087,9 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { struct rtable *rt; /* Route to the other host */ - int mtu; int rc; int local; - int rt_mode; + int rt_mode, was_input; EnterFunction(10); @@ -1217,16 +1109,17 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* * mangle and send the packet here (only for VS/NAT) */ + was_input = rt_is_input_route(skb_rtable(skb)); /* LOCALNODE from FORWARD hook is not supported */ rt_mode = (hooknum != NF_INET_FORWARD) ? IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; - if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, - RT_TOS(ip_hdr(skb)->tos), - rt_mode, NULL))) - goto tx_error_icmp; - local = rt->rt_flags & RTCF_LOCAL; + rcu_read_lock(); + local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, rt_mode, NULL); + if (local < 0) + goto tx_error; + rt = skb_rtable(skb); /* * Avoid duplicate tuple in reply direction for NAT traffic @@ -1241,82 +1134,51 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG(10, "%s(): " "stopping DNAT to local address %pI4\n", __func__, &cp->daddr.ip); - goto tx_error_put; + goto tx_error; } } #endif /* From world but DNAT to loopback address? */ - if (local && ipv4_is_loopback(cp->daddr.ip) && - rt_is_input_route(skb_rtable(skb))) { + if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) { IP_VS_DBG(1, "%s(): " "stopping DNAT to loopback %pI4\n", __func__, &cp->daddr.ip); - goto tx_error_put; - } - - /* MTU checking */ - mtu = dst_mtu(&rt->dst); - if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) && - !skb_is_gso(skb)) { - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); - IP_VS_DBG_RL("%s(): frag needed\n", __func__); - goto tx_error_put; + goto tx_error; } /* copy-on-write the packet before mangling it */ if (!skb_make_writable(skb, offset)) - goto tx_error_put; + goto tx_error; if (skb_cow(skb, rt->dst.dev->hard_header_len)) - goto tx_error_put; + goto tx_error; ip_vs_nat_icmp(skb, pp, cp, 0); - if (!local) { - /* drop the old route when skb is not shared */ - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - } else { - ip_rt_put(rt); - /* - * Some IPv4 replies get local address from routes, - * not from iph, so while we DNAT after routing - * we need this second input/output route. - */ - if (!__ip_vs_reroute_locally(skb)) - goto tx_error; - } - /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local); - - rc = NF_STOLEN; + rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); + rcu_read_unlock(); goto out; - tx_error_icmp: - dst_link_failure(skb); tx_error: - dev_kfree_skb(skb); + kfree_skb(skb); + rcu_read_unlock(); rc = NF_STOLEN; out: LeaveFunction(10); return rc; - tx_error_put: - ip_rt_put(rt); - goto tx_error; } #ifdef CONFIG_IP_VS_IPV6 int ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, int offset, unsigned int hooknum, - struct ip_vs_iphdr *iph) + struct ip_vs_iphdr *ipvsh) { struct rt6_info *rt; /* Route to the other host */ - int mtu; int rc; int local; int rt_mode; @@ -1328,7 +1190,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, translate address/port back */ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { if (cp->packet_xmit) - rc = cp->packet_xmit(skb, cp, pp, iph); + rc = cp->packet_xmit(skb, cp, pp, ipvsh); else rc = NF_ACCEPT; /* do not touch skb anymore */ @@ -1344,11 +1206,12 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, rt_mode = (hooknum != NF_INET_FORWARD) ? IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; - if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, - 0, rt_mode))) - goto tx_error_icmp; - - local = __ip_vs_is_local_route6(rt); + rcu_read_lock(); + local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, + ipvsh, 0, rt_mode); + if (local < 0) + goto tx_error; + rt = (struct rt6_info *) skb_dst(skb); /* * Avoid duplicate tuple in reply direction for NAT traffic * to local address when connection is sync-ed @@ -1362,7 +1225,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG(10, "%s(): " "stopping DNAT to local address %pI6\n", __func__, &cp->daddr.in6); - goto tx_error_put; + goto tx_error; } } #endif @@ -1373,60 +1236,31 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG(1, "%s(): " "stopping DNAT to loopback %pI6\n", __func__, &cp->daddr.in6); - goto tx_error_put; - } - - /* MTU checking */ - mtu = dst_mtu(&rt->dst); - if (__mtu_check_toobig_v6(skb, mtu)) { - if (!skb->dev) { - struct net *net = dev_net(skb_dst(skb)->dev); - - skb->dev = net->loopback_dev; - } - /* only send ICMP too big on first fragment */ - if (!iph->fragoffs) - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - IP_VS_DBG_RL("%s(): frag needed\n", __func__); - goto tx_error_put; + goto tx_error; } /* copy-on-write the packet before mangling it */ if (!skb_make_writable(skb, offset)) - goto tx_error_put; + goto tx_error; if (skb_cow(skb, rt->dst.dev->hard_header_len)) - goto tx_error_put; + goto tx_error; ip_vs_nat_icmp_v6(skb, pp, cp, 0); - if (!local || !skb->dev) { - /* drop the old route when skb is not shared */ - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - } else { - /* destined to loopback, do we need to change route? */ - dst_release(&rt->dst); - } - /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local); - - rc = NF_STOLEN; + rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); + rcu_read_unlock(); goto out; -tx_error_icmp: - dst_link_failure(skb); tx_error: - dev_kfree_skb(skb); + kfree_skb(skb); + rcu_read_unlock(); rc = NF_STOLEN; out: LeaveFunction(10); return rc; -tx_error_put: - dst_release(&rt->dst); - goto tx_error; } #endif diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 94b4b9853f60..a0b1c5c23d1c 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -353,7 +353,7 @@ void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct, /* rcu_read_lock()ed by nf_hook_slow */ helper = rcu_dereference(help->helper); - nf_log_packet(nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, + nf_log_packet(nf_ct_net(ct), nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, "nf_ct_%s: dropping packet: %pV ", helper->name, &vaf); va_end(args); diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index ba65b2041eb4..a99b6c3427b0 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -456,7 +456,8 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, out_invalid: if (LOG_INVALID(net, IPPROTO_DCCP)) - nf_log_packet(nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, msg); + nf_log_packet(net, nf_ct_l3num(ct), 0, skb, NULL, NULL, + NULL, msg); return false; } @@ -542,13 +543,13 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_DCCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_dccp: invalid packet ignored "); return NF_ACCEPT; case CT_DCCP_INVALID: spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_DCCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_dccp: invalid state transition "); return -NF_ACCEPT; } @@ -613,7 +614,7 @@ static int dccp_error(struct net *net, struct nf_conn *tmpl, out_invalid: if (LOG_INVALID(net, IPPROTO_DCCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, msg); + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, msg); return -NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 83876e9877f1..f021a2076c87 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -720,7 +720,7 @@ static bool tcp_in_window(const struct nf_conn *ct, tn->tcp_be_liberal) res = true; if (!res && LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: %s ", before(seq, sender->td_maxend + 1) ? after(end, sender->td_end - receiver->td_maxwin - 1) ? @@ -772,7 +772,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl, th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); if (th == NULL) { if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: short packet "); return -NF_ACCEPT; } @@ -780,7 +780,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl, /* Not whole TCP header or malformed packet */ if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: truncated/malformed packet "); return -NF_ACCEPT; } @@ -793,7 +793,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl, if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) { if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: bad TCP checksum "); return -NF_ACCEPT; } @@ -802,7 +802,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl, tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH)); if (!tcp_valid_flags[tcpflags]) { if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid TCP flag combination "); return -NF_ACCEPT; } @@ -949,7 +949,7 @@ static int tcp_packet(struct nf_conn *ct, } spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid packet ignored in " "state %s ", tcp_conntrack_names[old_state]); return NF_ACCEPT; @@ -959,7 +959,7 @@ static int tcp_packet(struct nf_conn *ct, dir, get_conntrack_index(th), old_state); spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid state "); return -NF_ACCEPT; case TCP_CONNTRACK_CLOSE: @@ -969,8 +969,8 @@ static int tcp_packet(struct nf_conn *ct, /* Invalid RST */ spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, - "nf_ct_tcp: invalid RST "); + nf_log_packet(net, pf, 0, skb, NULL, NULL, + NULL, "nf_ct_tcp: invalid RST "); return -NF_ACCEPT; } if (index == TCP_RST_SET diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 59623cc56e8d..fee43228e115 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -119,7 +119,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); if (hdr == NULL) { if (LOG_INVALID(net, IPPROTO_UDP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: short packet "); return -NF_ACCEPT; } @@ -127,7 +127,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, /* Truncated/malformed packets */ if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) { if (LOG_INVALID(net, IPPROTO_UDP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: truncated/malformed packet "); return -NF_ACCEPT; } @@ -143,7 +143,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) { if (LOG_INVALID(net, IPPROTO_UDP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: bad UDP checksum "); return -NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index ca969f6273f7..2750e6c69f82 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -131,7 +131,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl, hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); if (hdr == NULL) { if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: short packet "); return -NF_ACCEPT; } @@ -141,7 +141,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl, cscov = udplen; else if (cscov < sizeof(*hdr) || cscov > udplen) { if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: invalid checksum coverage "); return -NF_ACCEPT; } @@ -149,7 +149,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl, /* UDPLITE mandates checksums */ if (!hdr->check) { if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: checksum missing "); return -NF_ACCEPT; } @@ -159,7 +159,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl, nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP, pf)) { if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: bad UDPLite checksum "); return -NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 6c69fbdb8361..ebb67d33bd63 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -572,6 +572,7 @@ static int __init nf_conntrack_standalone_init(void) register_net_sysctl(&init_net, "net", nf_ct_netfilter_table); if (!nf_ct_netfilter_header) { pr_err("nf_conntrack: can't register to sysctl.\n"); + ret = -ENOMEM; goto out_sysctl; } #endif diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 9e312695c818..388656d5a9ec 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -16,7 +16,6 @@ #define NF_LOG_PREFIXLEN 128 #define NFLOGGER_NAME_LEN 64 -static const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO] __read_mostly; static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly; static DEFINE_MUTEX(nf_log_mutex); @@ -32,13 +31,46 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger) return NULL; } +void nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger) +{ + const struct nf_logger *log; + + if (pf == NFPROTO_UNSPEC) + return; + + mutex_lock(&nf_log_mutex); + log = rcu_dereference_protected(net->nf.nf_loggers[pf], + lockdep_is_held(&nf_log_mutex)); + if (log == NULL) + rcu_assign_pointer(net->nf.nf_loggers[pf], logger); + + mutex_unlock(&nf_log_mutex); +} +EXPORT_SYMBOL(nf_log_set); + +void nf_log_unset(struct net *net, const struct nf_logger *logger) +{ + int i; + const struct nf_logger *log; + + mutex_lock(&nf_log_mutex); + for (i = 0; i < NFPROTO_NUMPROTO; i++) { + log = rcu_dereference_protected(net->nf.nf_loggers[i], + lockdep_is_held(&nf_log_mutex)); + if (log == logger) + RCU_INIT_POINTER(net->nf.nf_loggers[i], NULL); + } + mutex_unlock(&nf_log_mutex); + synchronize_rcu(); +} +EXPORT_SYMBOL(nf_log_unset); + /* return EEXIST if the same logger is registered, 0 on success. */ int nf_log_register(u_int8_t pf, struct nf_logger *logger) { - const struct nf_logger *llog; int i; - if (pf >= ARRAY_SIZE(nf_loggers)) + if (pf >= ARRAY_SIZE(init_net.nf.nf_loggers)) return -EINVAL; for (i = 0; i < ARRAY_SIZE(logger->list); i++) @@ -52,10 +84,6 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger) } else { /* register at end of list to honor first register win */ list_add_tail(&logger->list[pf], &nf_loggers_l[pf]); - llog = rcu_dereference_protected(nf_loggers[pf], - lockdep_is_held(&nf_log_mutex)); - if (llog == NULL) - rcu_assign_pointer(nf_loggers[pf], logger); } mutex_unlock(&nf_log_mutex); @@ -66,49 +94,43 @@ EXPORT_SYMBOL(nf_log_register); void nf_log_unregister(struct nf_logger *logger) { - const struct nf_logger *c_logger; int i; mutex_lock(&nf_log_mutex); - for (i = 0; i < ARRAY_SIZE(nf_loggers); i++) { - c_logger = rcu_dereference_protected(nf_loggers[i], - lockdep_is_held(&nf_log_mutex)); - if (c_logger == logger) - RCU_INIT_POINTER(nf_loggers[i], NULL); + for (i = 0; i < NFPROTO_NUMPROTO; i++) list_del(&logger->list[i]); - } mutex_unlock(&nf_log_mutex); - - synchronize_rcu(); } EXPORT_SYMBOL(nf_log_unregister); -int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger) +int nf_log_bind_pf(struct net *net, u_int8_t pf, + const struct nf_logger *logger) { - if (pf >= ARRAY_SIZE(nf_loggers)) + if (pf >= ARRAY_SIZE(net->nf.nf_loggers)) return -EINVAL; mutex_lock(&nf_log_mutex); if (__find_logger(pf, logger->name) == NULL) { mutex_unlock(&nf_log_mutex); return -ENOENT; } - rcu_assign_pointer(nf_loggers[pf], logger); + rcu_assign_pointer(net->nf.nf_loggers[pf], logger); mutex_unlock(&nf_log_mutex); return 0; } EXPORT_SYMBOL(nf_log_bind_pf); -void nf_log_unbind_pf(u_int8_t pf) +void nf_log_unbind_pf(struct net *net, u_int8_t pf) { - if (pf >= ARRAY_SIZE(nf_loggers)) + if (pf >= ARRAY_SIZE(net->nf.nf_loggers)) return; mutex_lock(&nf_log_mutex); - RCU_INIT_POINTER(nf_loggers[pf], NULL); + RCU_INIT_POINTER(net->nf.nf_loggers[pf], NULL); mutex_unlock(&nf_log_mutex); } EXPORT_SYMBOL(nf_log_unbind_pf); -void nf_log_packet(u_int8_t pf, +void nf_log_packet(struct net *net, + u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, @@ -121,7 +143,7 @@ void nf_log_packet(u_int8_t pf, const struct nf_logger *logger; rcu_read_lock(); - logger = rcu_dereference(nf_loggers[pf]); + logger = rcu_dereference(net->nf.nf_loggers[pf]); if (logger) { va_start(args, fmt); vsnprintf(prefix, sizeof(prefix), fmt, args); @@ -135,9 +157,11 @@ EXPORT_SYMBOL(nf_log_packet); #ifdef CONFIG_PROC_FS static void *seq_start(struct seq_file *seq, loff_t *pos) { + struct net *net = seq_file_net(seq); + mutex_lock(&nf_log_mutex); - if (*pos >= ARRAY_SIZE(nf_loggers)) + if (*pos >= ARRAY_SIZE(net->nf.nf_loggers)) return NULL; return pos; @@ -145,9 +169,11 @@ static void *seq_start(struct seq_file *seq, loff_t *pos) static void *seq_next(struct seq_file *s, void *v, loff_t *pos) { + struct net *net = seq_file_net(s); + (*pos)++; - if (*pos >= ARRAY_SIZE(nf_loggers)) + if (*pos >= ARRAY_SIZE(net->nf.nf_loggers)) return NULL; return pos; @@ -164,8 +190,9 @@ static int seq_show(struct seq_file *s, void *v) const struct nf_logger *logger; struct nf_logger *t; int ret; + struct net *net = seq_file_net(s); - logger = rcu_dereference_protected(nf_loggers[*pos], + logger = rcu_dereference_protected(net->nf.nf_loggers[*pos], lockdep_is_held(&nf_log_mutex)); if (!logger) @@ -199,7 +226,8 @@ static const struct seq_operations nflog_seq_ops = { static int nflog_open(struct inode *inode, struct file *file) { - return seq_open(file, &nflog_seq_ops); + return seq_open_net(inode, file, &nflog_seq_ops, + sizeof(struct seq_net_private)); } static const struct file_operations nflog_file_ops = { @@ -207,7 +235,7 @@ static const struct file_operations nflog_file_ops = { .open = nflog_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_net, }; @@ -216,7 +244,6 @@ static const struct file_operations nflog_file_ops = { #ifdef CONFIG_SYSCTL static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3]; static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1]; -static struct ctl_table_header *nf_log_dir_header; static int nf_log_proc_dostring(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -226,6 +253,7 @@ static int nf_log_proc_dostring(ctl_table *table, int write, size_t size = *lenp; int r = 0; int tindex = (unsigned long)table->extra1; + struct net *net = current->nsproxy->net_ns; if (write) { if (size > sizeof(buf)) @@ -234,7 +262,7 @@ static int nf_log_proc_dostring(ctl_table *table, int write, return -EFAULT; if (!strcmp(buf, "NONE")) { - nf_log_unbind_pf(tindex); + nf_log_unbind_pf(net, tindex); return 0; } mutex_lock(&nf_log_mutex); @@ -243,11 +271,11 @@ static int nf_log_proc_dostring(ctl_table *table, int write, mutex_unlock(&nf_log_mutex); return -ENOENT; } - rcu_assign_pointer(nf_loggers[tindex], logger); + rcu_assign_pointer(net->nf.nf_loggers[tindex], logger); mutex_unlock(&nf_log_mutex); } else { mutex_lock(&nf_log_mutex); - logger = rcu_dereference_protected(nf_loggers[tindex], + logger = rcu_dereference_protected(net->nf.nf_loggers[tindex], lockdep_is_held(&nf_log_mutex)); if (!logger) table->data = "NONE"; @@ -260,49 +288,111 @@ static int nf_log_proc_dostring(ctl_table *table, int write, return r; } -static __init int netfilter_log_sysctl_init(void) +static int netfilter_log_sysctl_init(struct net *net) { int i; - - for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) { - snprintf(nf_log_sysctl_fnames[i-NFPROTO_UNSPEC], 3, "%d", i); - nf_log_sysctl_table[i].procname = - nf_log_sysctl_fnames[i-NFPROTO_UNSPEC]; - nf_log_sysctl_table[i].data = NULL; - nf_log_sysctl_table[i].maxlen = - NFLOGGER_NAME_LEN * sizeof(char); - nf_log_sysctl_table[i].mode = 0644; - nf_log_sysctl_table[i].proc_handler = nf_log_proc_dostring; - nf_log_sysctl_table[i].extra1 = (void *)(unsigned long) i; + struct ctl_table *table; + + table = nf_log_sysctl_table; + if (!net_eq(net, &init_net)) { + table = kmemdup(nf_log_sysctl_table, + sizeof(nf_log_sysctl_table), + GFP_KERNEL); + if (!table) + goto err_alloc; + } else { + for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) { + snprintf(nf_log_sysctl_fnames[i], + 3, "%d", i); + nf_log_sysctl_table[i].procname = + nf_log_sysctl_fnames[i]; + nf_log_sysctl_table[i].data = NULL; + nf_log_sysctl_table[i].maxlen = + NFLOGGER_NAME_LEN * sizeof(char); + nf_log_sysctl_table[i].mode = 0644; + nf_log_sysctl_table[i].proc_handler = + nf_log_proc_dostring; + nf_log_sysctl_table[i].extra1 = + (void *)(unsigned long) i; + } } - nf_log_dir_header = register_net_sysctl(&init_net, "net/netfilter/nf_log", - nf_log_sysctl_table); - if (!nf_log_dir_header) - return -ENOMEM; + net->nf.nf_log_dir_header = register_net_sysctl(net, + "net/netfilter/nf_log", + table); + if (!net->nf.nf_log_dir_header) + goto err_reg; return 0; + +err_reg: + if (!net_eq(net, &init_net)) + kfree(table); +err_alloc: + return -ENOMEM; +} + +static void netfilter_log_sysctl_exit(struct net *net) +{ + struct ctl_table *table; + + table = net->nf.nf_log_dir_header->ctl_table_arg; + unregister_net_sysctl_table(net->nf.nf_log_dir_header); + if (!net_eq(net, &init_net)) + kfree(table); } #else -static __init int netfilter_log_sysctl_init(void) +static int netfilter_log_sysctl_init(struct net *net) { return 0; } + +static void netfilter_log_sysctl_exit(struct net *net) +{ +} #endif /* CONFIG_SYSCTL */ -int __init netfilter_log_init(void) +static int __net_init nf_log_net_init(struct net *net) { - int i, r; + int ret = -ENOMEM; + #ifdef CONFIG_PROC_FS if (!proc_create("nf_log", S_IRUGO, - proc_net_netfilter, &nflog_file_ops)) - return -1; + net->nf.proc_netfilter, &nflog_file_ops)) + return ret; #endif + ret = netfilter_log_sysctl_init(net); + if (ret < 0) + goto out_sysctl; + + return 0; - /* Errors will trigger panic, unroll on error is unnecessary. */ - r = netfilter_log_sysctl_init(); - if (r < 0) - return r; +out_sysctl: + /* For init_net: errors will trigger panic, don't unroll on error. */ + if (!net_eq(net, &init_net)) + remove_proc_entry("nf_log", net->nf.proc_netfilter); + + return ret; +} + +static void __net_exit nf_log_net_exit(struct net *net) +{ + netfilter_log_sysctl_exit(net); + remove_proc_entry("nf_log", net->nf.proc_netfilter); +} + +static struct pernet_operations nf_log_net_ops = { + .init = nf_log_net_init, + .exit = nf_log_net_exit, +}; + +int __init netfilter_log_init(void) +{ + int i, ret; + + ret = register_pernet_subsys(&nf_log_net_ops); + if (ret < 0) + return ret; for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) INIT_LIST_HEAD(&(nf_loggers_l[i])); diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 589d686f0b4c..dc3fd5d44464 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -49,6 +49,8 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, return -EINVAL; acct_name = nla_data(tb[NFACCT_NAME]); + if (strlen(acct_name) == 0) + return -EINVAL; list_for_each_entry(nfacct, &nfnl_acct_list, head) { if (strncmp(nfacct->name, acct_name, NFACCT_NAME_MAX) != 0) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 4a2593f100cb..1a0be2af1dd8 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -32,6 +32,7 @@ #include <linux/slab.h> #include <net/sock.h> #include <net/netfilter/nf_log.h> +#include <net/netns/generic.h> #include <net/netfilter/nfnetlink_log.h> #include <linux/atomic.h> @@ -56,6 +57,7 @@ struct nfulnl_instance { unsigned int qlen; /* number of nlmsgs in skb */ struct sk_buff *skb; /* pre-allocatd skb */ struct timer_list timer; + struct net *net; struct user_namespace *peer_user_ns; /* User namespace of the peer process */ int peer_portid; /* PORTID of the peer process */ @@ -71,25 +73,34 @@ struct nfulnl_instance { struct rcu_head rcu; }; -static DEFINE_SPINLOCK(instances_lock); -static atomic_t global_seq; - #define INSTANCE_BUCKETS 16 -static struct hlist_head instance_table[INSTANCE_BUCKETS]; static unsigned int hash_init; +static int nfnl_log_net_id __read_mostly; + +struct nfnl_log_net { + spinlock_t instances_lock; + struct hlist_head instance_table[INSTANCE_BUCKETS]; + atomic_t global_seq; +}; + +static struct nfnl_log_net *nfnl_log_pernet(struct net *net) +{ + return net_generic(net, nfnl_log_net_id); +} + static inline u_int8_t instance_hashfn(u_int16_t group_num) { return ((group_num & 0xff) % INSTANCE_BUCKETS); } static struct nfulnl_instance * -__instance_lookup(u_int16_t group_num) +__instance_lookup(struct nfnl_log_net *log, u_int16_t group_num) { struct hlist_head *head; struct nfulnl_instance *inst; - head = &instance_table[instance_hashfn(group_num)]; + head = &log->instance_table[instance_hashfn(group_num)]; hlist_for_each_entry_rcu(inst, head, hlist) { if (inst->group_num == group_num) return inst; @@ -104,12 +115,12 @@ instance_get(struct nfulnl_instance *inst) } static struct nfulnl_instance * -instance_lookup_get(u_int16_t group_num) +instance_lookup_get(struct nfnl_log_net *log, u_int16_t group_num) { struct nfulnl_instance *inst; rcu_read_lock_bh(); - inst = __instance_lookup(group_num); + inst = __instance_lookup(log, group_num); if (inst && !atomic_inc_not_zero(&inst->use)) inst = NULL; rcu_read_unlock_bh(); @@ -119,7 +130,11 @@ instance_lookup_get(u_int16_t group_num) static void nfulnl_instance_free_rcu(struct rcu_head *head) { - kfree(container_of(head, struct nfulnl_instance, rcu)); + struct nfulnl_instance *inst = + container_of(head, struct nfulnl_instance, rcu); + + put_net(inst->net); + kfree(inst); module_put(THIS_MODULE); } @@ -133,13 +148,15 @@ instance_put(struct nfulnl_instance *inst) static void nfulnl_timer(unsigned long data); static struct nfulnl_instance * -instance_create(u_int16_t group_num, int portid, struct user_namespace *user_ns) +instance_create(struct net *net, u_int16_t group_num, + int portid, struct user_namespace *user_ns) { struct nfulnl_instance *inst; + struct nfnl_log_net *log = nfnl_log_pernet(net); int err; - spin_lock_bh(&instances_lock); - if (__instance_lookup(group_num)) { + spin_lock_bh(&log->instances_lock); + if (__instance_lookup(log, group_num)) { err = -EEXIST; goto out_unlock; } @@ -163,6 +180,7 @@ instance_create(u_int16_t group_num, int portid, struct user_namespace *user_ns) setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst); + inst->net = get_net(net); inst->peer_user_ns = user_ns; inst->peer_portid = portid; inst->group_num = group_num; @@ -174,14 +192,15 @@ instance_create(u_int16_t group_num, int portid, struct user_namespace *user_ns) inst->copy_range = NFULNL_COPY_RANGE_MAX; hlist_add_head_rcu(&inst->hlist, - &instance_table[instance_hashfn(group_num)]); + &log->instance_table[instance_hashfn(group_num)]); + - spin_unlock_bh(&instances_lock); + spin_unlock_bh(&log->instances_lock); return inst; out_unlock: - spin_unlock_bh(&instances_lock); + spin_unlock_bh(&log->instances_lock); return ERR_PTR(err); } @@ -210,11 +229,12 @@ __instance_destroy(struct nfulnl_instance *inst) } static inline void -instance_destroy(struct nfulnl_instance *inst) +instance_destroy(struct nfnl_log_net *log, + struct nfulnl_instance *inst) { - spin_lock_bh(&instances_lock); + spin_lock_bh(&log->instances_lock); __instance_destroy(inst); - spin_unlock_bh(&instances_lock); + spin_unlock_bh(&log->instances_lock); } static int @@ -336,7 +356,7 @@ __nfulnl_send(struct nfulnl_instance *inst) if (!nlh) goto out; } - status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_portid, + status = nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid, MSG_DONTWAIT); inst->qlen = 0; @@ -370,7 +390,8 @@ nfulnl_timer(unsigned long data) /* This is an inline function, we don't really care about a long * list of arguments */ static inline int -__build_packet_message(struct nfulnl_instance *inst, +__build_packet_message(struct nfnl_log_net *log, + struct nfulnl_instance *inst, const struct sk_buff *skb, unsigned int data_len, u_int8_t pf, @@ -536,7 +557,7 @@ __build_packet_message(struct nfulnl_instance *inst, /* global sequence number */ if ((inst->flags & NFULNL_CFG_F_SEQ_GLOBAL) && nla_put_be32(inst->skb, NFULA_SEQ_GLOBAL, - htonl(atomic_inc_return(&global_seq)))) + htonl(atomic_inc_return(&log->global_seq)))) goto nla_put_failure; if (data_len) { @@ -592,13 +613,15 @@ nfulnl_log_packet(u_int8_t pf, const struct nf_loginfo *li; unsigned int qthreshold; unsigned int plen; + struct net *net = dev_net(in ? in : out); + struct nfnl_log_net *log = nfnl_log_pernet(net); if (li_user && li_user->type == NF_LOG_TYPE_ULOG) li = li_user; else li = &default_loginfo; - inst = instance_lookup_get(li->u.ulog.group); + inst = instance_lookup_get(log, li->u.ulog.group); if (!inst) return; @@ -680,7 +703,7 @@ nfulnl_log_packet(u_int8_t pf, inst->qlen++; - __build_packet_message(inst, skb, data_len, pf, + __build_packet_message(log, inst, skb, data_len, pf, hooknum, in, out, prefix, plen); if (inst->qlen >= qthreshold) @@ -709,24 +732,24 @@ nfulnl_rcv_nl_event(struct notifier_block *this, unsigned long event, void *ptr) { struct netlink_notify *n = ptr; + struct nfnl_log_net *log = nfnl_log_pernet(n->net); if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) { int i; /* destroy all instances for this portid */ - spin_lock_bh(&instances_lock); + spin_lock_bh(&log->instances_lock); for (i = 0; i < INSTANCE_BUCKETS; i++) { struct hlist_node *t2; struct nfulnl_instance *inst; - struct hlist_head *head = &instance_table[i]; + struct hlist_head *head = &log->instance_table[i]; hlist_for_each_entry_safe(inst, t2, head, hlist) { - if ((net_eq(n->net, &init_net)) && - (n->portid == inst->peer_portid)) + if (n->portid == inst->peer_portid) __instance_destroy(inst); } } - spin_unlock_bh(&instances_lock); + spin_unlock_bh(&log->instances_lock); } return NOTIFY_DONE; } @@ -767,6 +790,8 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, u_int16_t group_num = ntohs(nfmsg->res_id); struct nfulnl_instance *inst; struct nfulnl_msg_config_cmd *cmd = NULL; + struct net *net = sock_net(ctnl); + struct nfnl_log_net *log = nfnl_log_pernet(net); int ret = 0; if (nfula[NFULA_CFG_CMD]) { @@ -776,14 +801,14 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, /* Commands without queue context */ switch (cmd->command) { case NFULNL_CFG_CMD_PF_BIND: - return nf_log_bind_pf(pf, &nfulnl_logger); + return nf_log_bind_pf(net, pf, &nfulnl_logger); case NFULNL_CFG_CMD_PF_UNBIND: - nf_log_unbind_pf(pf); + nf_log_unbind_pf(net, pf); return 0; } } - inst = instance_lookup_get(group_num); + inst = instance_lookup_get(log, group_num); if (inst && inst->peer_portid != NETLINK_CB(skb).portid) { ret = -EPERM; goto out_put; @@ -797,7 +822,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, goto out_put; } - inst = instance_create(group_num, + inst = instance_create(net, group_num, NETLINK_CB(skb).portid, sk_user_ns(NETLINK_CB(skb).ssk)); if (IS_ERR(inst)) { @@ -811,7 +836,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, goto out; } - instance_destroy(inst); + instance_destroy(log, inst); goto out_put; default: ret = -ENOTSUPP; @@ -894,55 +919,68 @@ static const struct nfnetlink_subsystem nfulnl_subsys = { #ifdef CONFIG_PROC_FS struct iter_state { + struct seq_net_private p; unsigned int bucket; }; -static struct hlist_node *get_first(struct iter_state *st) +static struct hlist_node *get_first(struct net *net, struct iter_state *st) { + struct nfnl_log_net *log; if (!st) return NULL; + log = nfnl_log_pernet(net); + for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { - if (!hlist_empty(&instance_table[st->bucket])) - return rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket])); + struct hlist_head *head = &log->instance_table[st->bucket]; + + if (!hlist_empty(head)) + return rcu_dereference_bh(hlist_first_rcu(head)); } return NULL; } -static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h) +static struct hlist_node *get_next(struct net *net, struct iter_state *st, + struct hlist_node *h) { h = rcu_dereference_bh(hlist_next_rcu(h)); while (!h) { + struct nfnl_log_net *log; + struct hlist_head *head; + if (++st->bucket >= INSTANCE_BUCKETS) return NULL; - h = rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket])); + log = nfnl_log_pernet(net); + head = &log->instance_table[st->bucket]; + h = rcu_dereference_bh(hlist_first_rcu(head)); } return h; } -static struct hlist_node *get_idx(struct iter_state *st, loff_t pos) +static struct hlist_node *get_idx(struct net *net, struct iter_state *st, + loff_t pos) { struct hlist_node *head; - head = get_first(st); + head = get_first(net, st); if (head) - while (pos && (head = get_next(st, head))) + while (pos && (head = get_next(net, st, head))) pos--; return pos ? NULL : head; } -static void *seq_start(struct seq_file *seq, loff_t *pos) +static void *seq_start(struct seq_file *s, loff_t *pos) __acquires(rcu_bh) { rcu_read_lock_bh(); - return get_idx(seq->private, *pos); + return get_idx(seq_file_net(s), s->private, *pos); } static void *seq_next(struct seq_file *s, void *v, loff_t *pos) { (*pos)++; - return get_next(s->private, v); + return get_next(seq_file_net(s), s->private, v); } static void seq_stop(struct seq_file *s, void *v) @@ -971,8 +1009,8 @@ static const struct seq_operations nful_seq_ops = { static int nful_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &nful_seq_ops, - sizeof(struct iter_state)); + return seq_open_net(inode, file, &nful_seq_ops, + sizeof(struct iter_state)); } static const struct file_operations nful_file_ops = { @@ -980,17 +1018,43 @@ static const struct file_operations nful_file_ops = { .open = nful_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; #endif /* PROC_FS */ -static int __init nfnetlink_log_init(void) +static int __net_init nfnl_log_net_init(struct net *net) { - int i, status = -ENOMEM; + unsigned int i; + struct nfnl_log_net *log = nfnl_log_pernet(net); for (i = 0; i < INSTANCE_BUCKETS; i++) - INIT_HLIST_HEAD(&instance_table[i]); + INIT_HLIST_HEAD(&log->instance_table[i]); + spin_lock_init(&log->instances_lock); + +#ifdef CONFIG_PROC_FS + if (!proc_create("nfnetlink_log", 0440, + net->nf.proc_netfilter, &nful_file_ops)) + return -ENOMEM; +#endif + return 0; +} + +static void __net_exit nfnl_log_net_exit(struct net *net) +{ + remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter); +} + +static struct pernet_operations nfnl_log_net_ops = { + .init = nfnl_log_net_init, + .exit = nfnl_log_net_exit, + .id = &nfnl_log_net_id, + .size = sizeof(struct nfnl_log_net), +}; + +static int __init nfnetlink_log_init(void) +{ + int status = -ENOMEM; /* it's not really all that important to have a random value, so * we can do this from the init function, even if there hasn't @@ -1000,29 +1064,25 @@ static int __init nfnetlink_log_init(void) netlink_register_notifier(&nfulnl_rtnl_notifier); status = nfnetlink_subsys_register(&nfulnl_subsys); if (status < 0) { - printk(KERN_ERR "log: failed to create netlink socket\n"); + pr_err("log: failed to create netlink socket\n"); goto cleanup_netlink_notifier; } status = nf_log_register(NFPROTO_UNSPEC, &nfulnl_logger); if (status < 0) { - printk(KERN_ERR "log: failed to register logger\n"); + pr_err("log: failed to register logger\n"); goto cleanup_subsys; } -#ifdef CONFIG_PROC_FS - if (!proc_create("nfnetlink_log", 0440, - proc_net_netfilter, &nful_file_ops)) { - status = -ENOMEM; + status = register_pernet_subsys(&nfnl_log_net_ops); + if (status < 0) { + pr_err("log: failed to register pernet ops\n"); goto cleanup_logger; } -#endif return status; -#ifdef CONFIG_PROC_FS cleanup_logger: nf_log_unregister(&nfulnl_logger); -#endif cleanup_subsys: nfnetlink_subsys_unregister(&nfulnl_subsys); cleanup_netlink_notifier: @@ -1032,10 +1092,8 @@ cleanup_netlink_notifier: static void __exit nfnetlink_log_fini(void) { + unregister_pernet_subsys(&nfnl_log_net_ops); nf_log_unregister(&nfulnl_logger); -#ifdef CONFIG_PROC_FS - remove_proc_entry("nfnetlink_log", proc_net_netfilter); -#endif nfnetlink_subsys_unregister(&nfulnl_subsys); netlink_unregister_notifier(&nfulnl_rtnl_notifier); } diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index e92c9161e396..5e280b3e154f 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -30,6 +30,7 @@ #include <linux/list.h> #include <net/sock.h> #include <net/netfilter/nf_queue.h> +#include <net/netns/generic.h> #include <net/netfilter/nfnetlink_queue.h> #include <linux/atomic.h> @@ -66,10 +67,18 @@ struct nfqnl_instance { typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long); -static DEFINE_SPINLOCK(instances_lock); +static int nfnl_queue_net_id __read_mostly; #define INSTANCE_BUCKETS 16 -static struct hlist_head instance_table[INSTANCE_BUCKETS] __read_mostly; +struct nfnl_queue_net { + spinlock_t instances_lock; + struct hlist_head instance_table[INSTANCE_BUCKETS]; +}; + +static struct nfnl_queue_net *nfnl_queue_pernet(struct net *net) +{ + return net_generic(net, nfnl_queue_net_id); +} static inline u_int8_t instance_hashfn(u_int16_t queue_num) { @@ -77,12 +86,12 @@ static inline u_int8_t instance_hashfn(u_int16_t queue_num) } static struct nfqnl_instance * -instance_lookup(u_int16_t queue_num) +instance_lookup(struct nfnl_queue_net *q, u_int16_t queue_num) { struct hlist_head *head; struct nfqnl_instance *inst; - head = &instance_table[instance_hashfn(queue_num)]; + head = &q->instance_table[instance_hashfn(queue_num)]; hlist_for_each_entry_rcu(inst, head, hlist) { if (inst->queue_num == queue_num) return inst; @@ -91,14 +100,15 @@ instance_lookup(u_int16_t queue_num) } static struct nfqnl_instance * -instance_create(u_int16_t queue_num, int portid) +instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, + int portid) { struct nfqnl_instance *inst; unsigned int h; int err; - spin_lock(&instances_lock); - if (instance_lookup(queue_num)) { + spin_lock(&q->instances_lock); + if (instance_lookup(q, queue_num)) { err = -EEXIST; goto out_unlock; } @@ -123,16 +133,16 @@ instance_create(u_int16_t queue_num, int portid) } h = instance_hashfn(queue_num); - hlist_add_head_rcu(&inst->hlist, &instance_table[h]); + hlist_add_head_rcu(&inst->hlist, &q->instance_table[h]); - spin_unlock(&instances_lock); + spin_unlock(&q->instances_lock); return inst; out_free: kfree(inst); out_unlock: - spin_unlock(&instances_lock); + spin_unlock(&q->instances_lock); return ERR_PTR(err); } @@ -158,11 +168,11 @@ __instance_destroy(struct nfqnl_instance *inst) } static void -instance_destroy(struct nfqnl_instance *inst) +instance_destroy(struct nfnl_queue_net *q, struct nfqnl_instance *inst) { - spin_lock(&instances_lock); + spin_lock(&q->instances_lock); __instance_destroy(inst); - spin_unlock(&instances_lock); + spin_unlock(&q->instances_lock); } static inline void @@ -473,9 +483,12 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) int err = -ENOBUFS; __be32 *packet_id_ptr; int failopen = 0; + struct net *net = dev_net(entry->indev ? + entry->indev : entry->outdev); + struct nfnl_queue_net *q = nfnl_queue_pernet(net); /* rcu_read_lock()ed by nf_hook_slow() */ - queue = instance_lookup(queuenum); + queue = instance_lookup(q, queuenum); if (!queue) { err = -ESRCH; goto err_out; @@ -512,7 +525,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) *packet_id_ptr = htonl(entry->id); /* nfnetlink_unicast will either free the nskb or add it to a socket */ - err = nfnetlink_unicast(nskb, &init_net, queue->peer_portid, MSG_DONTWAIT); + err = nfnetlink_unicast(nskb, net, queue->peer_portid, MSG_DONTWAIT); if (err < 0) { queue->queue_user_dropped++; goto err_out_unlock; @@ -625,15 +638,16 @@ dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex) /* drop all packets with either indev or outdev == ifindex from all queue * instances */ static void -nfqnl_dev_drop(int ifindex) +nfqnl_dev_drop(struct net *net, int ifindex) { int i; + struct nfnl_queue_net *q = nfnl_queue_pernet(net); rcu_read_lock(); for (i = 0; i < INSTANCE_BUCKETS; i++) { struct nfqnl_instance *inst; - struct hlist_head *head = &instance_table[i]; + struct hlist_head *head = &q->instance_table[i]; hlist_for_each_entry_rcu(inst, head, hlist) nfqnl_flush(inst, dev_cmp, ifindex); @@ -650,12 +664,9 @@ nfqnl_rcv_dev_event(struct notifier_block *this, { struct net_device *dev = ptr; - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; - /* Drop any packets associated with the downed device */ if (event == NETDEV_DOWN) - nfqnl_dev_drop(dev->ifindex); + nfqnl_dev_drop(dev_net(dev), dev->ifindex); return NOTIFY_DONE; } @@ -668,24 +679,24 @@ nfqnl_rcv_nl_event(struct notifier_block *this, unsigned long event, void *ptr) { struct netlink_notify *n = ptr; + struct nfnl_queue_net *q = nfnl_queue_pernet(n->net); if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) { int i; /* destroy all instances for this portid */ - spin_lock(&instances_lock); + spin_lock(&q->instances_lock); for (i = 0; i < INSTANCE_BUCKETS; i++) { struct hlist_node *t2; struct nfqnl_instance *inst; - struct hlist_head *head = &instance_table[i]; + struct hlist_head *head = &q->instance_table[i]; hlist_for_each_entry_safe(inst, t2, head, hlist) { - if ((n->net == &init_net) && - (n->portid == inst->peer_portid)) + if (n->portid == inst->peer_portid) __instance_destroy(inst); } } - spin_unlock(&instances_lock); + spin_unlock(&q->instances_lock); } return NOTIFY_DONE; } @@ -706,11 +717,12 @@ static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = { [NFQA_MARK] = { .type = NLA_U32 }, }; -static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlportid) +static struct nfqnl_instance * +verdict_instance_lookup(struct nfnl_queue_net *q, u16 queue_num, int nlportid) { struct nfqnl_instance *queue; - queue = instance_lookup(queue_num); + queue = instance_lookup(q, queue_num); if (!queue) return ERR_PTR(-ENODEV); @@ -754,7 +766,11 @@ nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb, LIST_HEAD(batch_list); u16 queue_num = ntohs(nfmsg->res_id); - queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).portid); + struct net *net = sock_net(ctnl); + struct nfnl_queue_net *q = nfnl_queue_pernet(net); + + queue = verdict_instance_lookup(q, queue_num, + NETLINK_CB(skb).portid); if (IS_ERR(queue)) return PTR_ERR(queue); @@ -802,10 +818,13 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, enum ip_conntrack_info uninitialized_var(ctinfo); struct nf_conn *ct = NULL; - queue = instance_lookup(queue_num); - if (!queue) + struct net *net = sock_net(ctnl); + struct nfnl_queue_net *q = nfnl_queue_pernet(net); - queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).portid); + queue = instance_lookup(q, queue_num); + if (!queue) + queue = verdict_instance_lookup(q, queue_num, + NETLINK_CB(skb).portid); if (IS_ERR(queue)) return PTR_ERR(queue); @@ -869,6 +888,8 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, u_int16_t queue_num = ntohs(nfmsg->res_id); struct nfqnl_instance *queue; struct nfqnl_msg_config_cmd *cmd = NULL; + struct net *net = sock_net(ctnl); + struct nfnl_queue_net *q = nfnl_queue_pernet(net); int ret = 0; if (nfqa[NFQA_CFG_CMD]) { @@ -882,7 +903,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, } rcu_read_lock(); - queue = instance_lookup(queue_num); + queue = instance_lookup(q, queue_num); if (queue && queue->peer_portid != NETLINK_CB(skb).portid) { ret = -EPERM; goto err_out_unlock; @@ -895,7 +916,8 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, ret = -EBUSY; goto err_out_unlock; } - queue = instance_create(queue_num, NETLINK_CB(skb).portid); + queue = instance_create(q, queue_num, + NETLINK_CB(skb).portid); if (IS_ERR(queue)) { ret = PTR_ERR(queue); goto err_out_unlock; @@ -906,7 +928,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, ret = -ENODEV; goto err_out_unlock; } - instance_destroy(queue); + instance_destroy(q, queue); break; case NFQNL_CFG_CMD_PF_BIND: case NFQNL_CFG_CMD_PF_UNBIND: @@ -1000,19 +1022,24 @@ static const struct nfnetlink_subsystem nfqnl_subsys = { #ifdef CONFIG_PROC_FS struct iter_state { + struct seq_net_private p; unsigned int bucket; }; static struct hlist_node *get_first(struct seq_file *seq) { struct iter_state *st = seq->private; + struct net *net; + struct nfnl_queue_net *q; if (!st) return NULL; + net = seq_file_net(seq); + q = nfnl_queue_pernet(net); for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { - if (!hlist_empty(&instance_table[st->bucket])) - return instance_table[st->bucket].first; + if (!hlist_empty(&q->instance_table[st->bucket])) + return q->instance_table[st->bucket].first; } return NULL; } @@ -1020,13 +1047,17 @@ static struct hlist_node *get_first(struct seq_file *seq) static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h) { struct iter_state *st = seq->private; + struct net *net = seq_file_net(seq); h = h->next; while (!h) { + struct nfnl_queue_net *q; + if (++st->bucket >= INSTANCE_BUCKETS) return NULL; - h = instance_table[st->bucket].first; + q = nfnl_queue_pernet(net); + h = q->instance_table[st->bucket].first; } return h; } @@ -1042,11 +1073,11 @@ static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos) return pos ? NULL : head; } -static void *seq_start(struct seq_file *seq, loff_t *pos) - __acquires(instances_lock) +static void *seq_start(struct seq_file *s, loff_t *pos) + __acquires(nfnl_queue_pernet(seq_file_net(s))->instances_lock) { - spin_lock(&instances_lock); - return get_idx(seq, *pos); + spin_lock(&nfnl_queue_pernet(seq_file_net(s))->instances_lock); + return get_idx(s, *pos); } static void *seq_next(struct seq_file *s, void *v, loff_t *pos) @@ -1056,9 +1087,9 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos) } static void seq_stop(struct seq_file *s, void *v) - __releases(instances_lock) + __releases(nfnl_queue_pernet(seq_file_net(s))->instances_lock) { - spin_unlock(&instances_lock); + spin_unlock(&nfnl_queue_pernet(seq_file_net(s))->instances_lock); } static int seq_show(struct seq_file *s, void *v) @@ -1082,7 +1113,7 @@ static const struct seq_operations nfqnl_seq_ops = { static int nfqnl_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &nfqnl_seq_ops, + return seq_open_net(inode, file, &nfqnl_seq_ops, sizeof(struct iter_state)); } @@ -1091,39 +1122,63 @@ static const struct file_operations nfqnl_file_ops = { .open = nfqnl_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; #endif /* PROC_FS */ -static int __init nfnetlink_queue_init(void) +static int __net_init nfnl_queue_net_init(struct net *net) { - int i, status = -ENOMEM; + unsigned int i; + struct nfnl_queue_net *q = nfnl_queue_pernet(net); for (i = 0; i < INSTANCE_BUCKETS; i++) - INIT_HLIST_HEAD(&instance_table[i]); + INIT_HLIST_HEAD(&q->instance_table[i]); + + spin_lock_init(&q->instances_lock); + +#ifdef CONFIG_PROC_FS + if (!proc_create("nfnetlink_queue", 0440, + net->nf.proc_netfilter, &nfqnl_file_ops)) + return -ENOMEM; +#endif + return 0; +} + +static void __net_exit nfnl_queue_net_exit(struct net *net) +{ + remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter); +} + +static struct pernet_operations nfnl_queue_net_ops = { + .init = nfnl_queue_net_init, + .exit = nfnl_queue_net_exit, + .id = &nfnl_queue_net_id, + .size = sizeof(struct nfnl_queue_net), +}; + +static int __init nfnetlink_queue_init(void) +{ + int status = -ENOMEM; netlink_register_notifier(&nfqnl_rtnl_notifier); status = nfnetlink_subsys_register(&nfqnl_subsys); if (status < 0) { - printk(KERN_ERR "nf_queue: failed to create netlink socket\n"); + pr_err("nf_queue: failed to create netlink socket\n"); goto cleanup_netlink_notifier; } -#ifdef CONFIG_PROC_FS - if (!proc_create("nfnetlink_queue", 0440, - proc_net_netfilter, &nfqnl_file_ops)) + status = register_pernet_subsys(&nfnl_queue_net_ops); + if (status < 0) { + pr_err("nf_queue: failed to register pernet ops\n"); goto cleanup_subsys; -#endif - + } register_netdevice_notifier(&nfqnl_dev_notifier); nf_register_queue_handler(&nfqh); return status; -#ifdef CONFIG_PROC_FS cleanup_subsys: nfnetlink_subsys_unregister(&nfqnl_subsys); -#endif cleanup_netlink_notifier: netlink_unregister_notifier(&nfqnl_rtnl_notifier); return status; @@ -1133,9 +1188,7 @@ static void __exit nfnetlink_queue_fini(void) { nf_unregister_queue_handler(); unregister_netdevice_notifier(&nfqnl_dev_notifier); -#ifdef CONFIG_PROC_FS - remove_proc_entry("nfnetlink_queue", proc_net_netfilter); -#endif + unregister_pernet_subsys(&nfnl_queue_net_ops); nfnetlink_subsys_unregister(&nfqnl_subsys); netlink_unregister_notifier(&nfqnl_rtnl_notifier); diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c index fa40096940a1..fe573f6c9e91 100644 --- a/net/netfilter/xt_LOG.c +++ b/net/netfilter/xt_LOG.c @@ -474,7 +474,14 @@ ipt_log_packet(u_int8_t pf, const struct nf_loginfo *loginfo, const char *prefix) { - struct sbuff *m = sb_open(); + struct sbuff *m; + struct net *net = dev_net(in ? in : out); + + /* FIXME: Disabled from containers until syslog ns is supported */ + if (!net_eq(net, &init_net)) + return; + + m = sb_open(); if (!loginfo) loginfo = &default_loginfo; @@ -798,7 +805,14 @@ ip6t_log_packet(u_int8_t pf, const struct nf_loginfo *loginfo, const char *prefix) { - struct sbuff *m = sb_open(); + struct sbuff *m; + struct net *net = dev_net(in ? in : out); + + /* FIXME: Disabled from containers until syslog ns is supported */ + if (!net_eq(net, &init_net)) + return; + + m = sb_open(); if (!loginfo) loginfo = &default_loginfo; @@ -893,23 +907,55 @@ static struct nf_logger ip6t_log_logger __read_mostly = { }; #endif +static int __net_init log_net_init(struct net *net) +{ + nf_log_set(net, NFPROTO_IPV4, &ipt_log_logger); +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) + nf_log_set(net, NFPROTO_IPV6, &ip6t_log_logger); +#endif + return 0; +} + +static void __net_exit log_net_exit(struct net *net) +{ + nf_log_unset(net, &ipt_log_logger); +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) + nf_log_unset(net, &ip6t_log_logger); +#endif +} + +static struct pernet_operations log_net_ops = { + .init = log_net_init, + .exit = log_net_exit, +}; + static int __init log_tg_init(void) { int ret; + ret = register_pernet_subsys(&log_net_ops); + if (ret < 0) + goto err_pernet; + ret = xt_register_targets(log_tg_regs, ARRAY_SIZE(log_tg_regs)); if (ret < 0) - return ret; + goto err_target; nf_log_register(NFPROTO_IPV4, &ipt_log_logger); #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) nf_log_register(NFPROTO_IPV6, &ip6t_log_logger); #endif return 0; + +err_target: + unregister_pernet_subsys(&log_net_ops); +err_pernet: + return ret; } static void __exit log_tg_exit(void) { + unregister_pernet_subsys(&log_net_ops); nf_log_unregister(&ipt_log_logger); #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) nf_log_unregister(&ip6t_log_logger); diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 817f9e9f2b16..1e2fae32f81b 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -76,22 +76,31 @@ static u32 hash_v6(const struct sk_buff *skb) } #endif -static unsigned int -nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par) +static u32 +nfqueue_hash(const struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_NFQ_info_v1 *info = par->targinfo; u32 queue = info->queuenum; - if (info->queues_total > 1) { - if (par->family == NFPROTO_IPV4) - queue = (((u64) hash_v4(skb) * info->queues_total) >> - 32) + queue; + if (par->family == NFPROTO_IPV4) + queue += ((u64) hash_v4(skb) * info->queues_total) >> 32; #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) - else if (par->family == NFPROTO_IPV6) - queue = (((u64) hash_v6(skb) * info->queues_total) >> - 32) + queue; + else if (par->family == NFPROTO_IPV6) + queue += ((u64) hash_v6(skb) * info->queues_total) >> 32; #endif - } + + return queue; +} + +static unsigned int +nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_NFQ_info_v1 *info = par->targinfo; + u32 queue = info->queuenum; + + if (info->queues_total > 1) + queue = nfqueue_hash(skb, par); + return NF_QUEUE_NR(queue); } @@ -108,7 +117,7 @@ nfqueue_tg_v2(struct sk_buff *skb, const struct xt_action_param *par) static int nfqueue_tg_check(const struct xt_tgchk_param *par) { - const struct xt_NFQ_info_v2 *info = par->targinfo; + const struct xt_NFQ_info_v3 *info = par->targinfo; u32 maxid; if (unlikely(!rnd_inited)) { @@ -125,11 +134,32 @@ static int nfqueue_tg_check(const struct xt_tgchk_param *par) info->queues_total, maxid); return -ERANGE; } - if (par->target->revision == 2 && info->bypass > 1) + if (par->target->revision == 2 && info->flags > 1) return -EINVAL; + if (par->target->revision == 3 && info->flags & ~NFQ_FLAG_MASK) + return -EINVAL; + return 0; } +static unsigned int +nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_NFQ_info_v3 *info = par->targinfo; + u32 queue = info->queuenum; + + if (info->queues_total > 1) { + if (info->flags & NFQ_FLAG_CPU_FANOUT) { + int cpu = smp_processor_id(); + + queue = info->queuenum + cpu % info->queues_total; + } else + queue = nfqueue_hash(skb, par); + } + + return NF_QUEUE_NR(queue); +} + static struct xt_target nfqueue_tg_reg[] __read_mostly = { { .name = "NFQUEUE", @@ -156,6 +186,15 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = { .targetsize = sizeof(struct xt_NFQ_info_v2), .me = THIS_MODULE, }, + { + .name = "NFQUEUE", + .revision = 3, + .family = NFPROTO_UNSPEC, + .checkentry = nfqueue_tg_check, + .target = nfqueue_tg_v3, + .targetsize = sizeof(struct xt_NFQ_info_v3), + .me = THIS_MODULE, + }, }; static int __init nfqueue_tg_init(void) diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index a5e673d32bda..647d989a01e6 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -201,6 +201,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) unsigned char opts[MAX_IPOPTLEN]; const struct xt_osf_finger *kf; const struct xt_osf_user_finger *f; + struct net *net = dev_net(p->in ? p->in : p->out); if (!info) return false; @@ -325,7 +326,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) fcount++; if (info->flags & XT_OSF_LOG) - nf_log_packet(p->family, p->hooknum, skb, + nf_log_packet(net, p->family, p->hooknum, skb, p->in, p->out, NULL, "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n", f->genre, f->version, f->subtype, @@ -341,7 +342,8 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) rcu_read_unlock(); if (!fcount && (info->flags & XT_OSF_LOG)) - nf_log_packet(p->family, p->hooknum, skb, p->in, p->out, NULL, + nf_log_packet(net, p->family, p->hooknum, skb, p->in, + p->out, NULL, "Remote OS is not known: %pI4:%u -> %pI4:%u\n", &ip->saddr, ntohs(tcp->source), &ip->daddr, ntohs(tcp->dest)); diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index d1fa1d9ffd2e..7fcb307dea47 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1173,6 +1173,7 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock, } if (sax != NULL) { + memset(sax, 0, sizeof(sax)); sax->sax25_family = AF_NETROM; skb_copy_from_linear_data_offset(skb, 7, sax->sax25_call.ax25_call, AX25_ADDR_LEN); diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c index bb67b98b9797..7de0368aff0c 100644 --- a/net/nfc/llcp/llcp.c +++ b/net/nfc/llcp/llcp.c @@ -107,8 +107,6 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen, accept_sk->sk_state_change(sk); bh_unlock_sock(accept_sk); - - sock_orphan(accept_sk); } if (listen == true) { @@ -134,8 +132,6 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen, bh_unlock_sock(sk); - sock_orphan(sk); - sk_del_node_init(sk); } @@ -164,8 +160,6 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen, bh_unlock_sock(sk); - sock_orphan(sk); - sk_del_node_init(sk); } @@ -869,7 +863,6 @@ static void nfc_llcp_recv_ui(struct nfc_llcp_local *local, skb_get(skb); } else { pr_err("Receive queue is full\n"); - kfree_skb(skb); } nfc_llcp_sock_put(llcp_sock); @@ -1072,7 +1065,6 @@ static void nfc_llcp_recv_hdlc(struct nfc_llcp_local *local, skb_get(skb); } else { pr_err("Receive queue is full\n"); - kfree_skb(skb); } } diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c index f1b377e247fe..c1101e6de170 100644 --- a/net/nfc/llcp/sock.c +++ b/net/nfc/llcp/sock.c @@ -388,7 +388,9 @@ struct sock *nfc_llcp_accept_dequeue(struct sock *parent, } if (sk->sk_state == LLCP_CONNECTED || !newsock) { - nfc_llcp_accept_unlink(sk); + list_del_init(&lsk->accept_queue); + sock_put(sk); + if (newsock) sock_graft(sk, newsock); @@ -521,7 +523,8 @@ static unsigned int llcp_sock_poll(struct file *file, struct socket *sock, return llcp_accept_poll(sk); if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR; + mask |= POLLERR | + (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); if (!skb_queue_empty(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; @@ -582,8 +585,6 @@ static int llcp_sock_release(struct socket *sock) nfc_llcp_accept_unlink(accept_sk); release_sock(accept_sk); - - sock_orphan(accept_sk); } } @@ -764,6 +765,8 @@ static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock, pr_debug("%p %zu\n", sk, len); + msg->msg_namelen = 0; + lock_sock(sk); if (sk->sk_state == LLCP_CLOSED && @@ -809,6 +812,7 @@ static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock, pr_debug("Datagram socket %d %d\n", ui_cb->dsap, ui_cb->ssap); + memset(sockaddr, 0, sizeof(*sockaddr)); sockaddr->sa_family = AF_NFC; sockaddr->nfc_protocol = NFC_PROTO_NFC_DEP; sockaddr->dsap = ui_cb->dsap; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8e4644ff8d34..e566b793f07c 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -158,10 +158,16 @@ struct packet_mreq_max { unsigned char mr_address[MAX_ADDR_LEN]; }; +union tpacket_uhdr { + struct tpacket_hdr *h1; + struct tpacket2_hdr *h2; + struct tpacket3_hdr *h3; + void *raw; +}; + static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, int closing, int tx_ring); - #define V3_ALIGNMENT (8) #define BLK_HDR_LEN (ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT)) @@ -290,11 +296,7 @@ static inline __pure struct page *pgv_to_page(void *addr) static void __packet_set_status(struct packet_sock *po, void *frame, int status) { - union { - struct tpacket_hdr *h1; - struct tpacket2_hdr *h2; - void *raw; - } h; + union tpacket_uhdr h; h.raw = frame; switch (po->tp_version) { @@ -317,11 +319,7 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status) static int __packet_get_status(struct packet_sock *po, void *frame) { - union { - struct tpacket_hdr *h1; - struct tpacket2_hdr *h2; - void *raw; - } h; + union tpacket_uhdr h; smp_rmb(); @@ -347,11 +345,7 @@ static void *packet_lookup_frame(struct packet_sock *po, int status) { unsigned int pg_vec_pos, frame_offset; - union { - struct tpacket_hdr *h1; - struct tpacket2_hdr *h2; - void *raw; - } h; + union tpacket_uhdr h; pg_vec_pos = position / rb->frames_per_block; frame_offset = position % rb->frames_per_block; @@ -1505,9 +1499,8 @@ retry: skb->dev = dev; skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); - if (err < 0) - goto out_unlock; + + sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); if (unlikely(extra_len == 4)) skb->no_fcs = 1; @@ -1670,12 +1663,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct sock *sk; struct packet_sock *po; struct sockaddr_ll *sll; - union { - struct tpacket_hdr *h1; - struct tpacket2_hdr *h2; - struct tpacket3_hdr *h3; - void *raw; - } h; + union tpacket_uhdr h; u8 *skb_head = skb->data; int skb_len = skb->len; unsigned int snaplen, res; @@ -1910,11 +1898,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, void *frame, struct net_device *dev, int size_max, __be16 proto, unsigned char *addr, int hlen) { - union { - struct tpacket_hdr *h1; - struct tpacket2_hdr *h2; - void *raw; - } ph; + union tpacket_uhdr ph; int to_write, offset, len, tp_len, nr_frags, len_max; struct socket *sock = po->sk.sk_socket; struct page *page; @@ -2312,9 +2296,8 @@ static int packet_snd(struct socket *sock, err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len); if (err) goto out_free; - err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); - if (err < 0) - goto out_free; + + sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); if (!gso_type && (len > dev->mtu + reserve + extra_len)) { /* Earlier code assumed this would be a VLAN pkt, diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index cf68e6e4054a..9c8347451597 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1253,6 +1253,7 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); if (srose != NULL) { + memset(srose, 0, msg->msg_namelen); srose->srose_family = AF_ROSE; srose->srose_addr = rose->dest_addr; srose->srose_call = rose->dest_call; diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 08fa1e8a4ca4..3a4c0caa1f7d 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -166,15 +166,17 @@ static int tcf_csum_ipv4_igmp(struct sk_buff *skb, return 1; } -static int tcf_csum_ipv6_icmp(struct sk_buff *skb, struct ipv6hdr *ip6h, +static int tcf_csum_ipv6_icmp(struct sk_buff *skb, unsigned int ihl, unsigned int ipl) { struct icmp6hdr *icmp6h; + const struct ipv6hdr *ip6h; icmp6h = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmp6h)); if (icmp6h == NULL) return 0; + ip6h = ipv6_hdr(skb); icmp6h->icmp6_cksum = 0; skb->csum = csum_partial(icmp6h, ipl - ihl, 0); icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, @@ -186,15 +188,17 @@ static int tcf_csum_ipv6_icmp(struct sk_buff *skb, struct ipv6hdr *ip6h, return 1; } -static int tcf_csum_ipv4_tcp(struct sk_buff *skb, struct iphdr *iph, +static int tcf_csum_ipv4_tcp(struct sk_buff *skb, unsigned int ihl, unsigned int ipl) { struct tcphdr *tcph; + const struct iphdr *iph; tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph)); if (tcph == NULL) return 0; + iph = ip_hdr(skb); tcph->check = 0; skb->csum = csum_partial(tcph, ipl - ihl, 0); tcph->check = tcp_v4_check(ipl - ihl, @@ -205,15 +209,17 @@ static int tcf_csum_ipv4_tcp(struct sk_buff *skb, struct iphdr *iph, return 1; } -static int tcf_csum_ipv6_tcp(struct sk_buff *skb, struct ipv6hdr *ip6h, +static int tcf_csum_ipv6_tcp(struct sk_buff *skb, unsigned int ihl, unsigned int ipl) { struct tcphdr *tcph; + const struct ipv6hdr *ip6h; tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph)); if (tcph == NULL) return 0; + ip6h = ipv6_hdr(skb); tcph->check = 0; skb->csum = csum_partial(tcph, ipl - ihl, 0); tcph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, @@ -225,10 +231,11 @@ static int tcf_csum_ipv6_tcp(struct sk_buff *skb, struct ipv6hdr *ip6h, return 1; } -static int tcf_csum_ipv4_udp(struct sk_buff *skb, struct iphdr *iph, +static int tcf_csum_ipv4_udp(struct sk_buff *skb, unsigned int ihl, unsigned int ipl, int udplite) { struct udphdr *udph; + const struct iphdr *iph; u16 ul; /* @@ -242,6 +249,7 @@ static int tcf_csum_ipv4_udp(struct sk_buff *skb, struct iphdr *iph, if (udph == NULL) return 0; + iph = ip_hdr(skb); ul = ntohs(udph->len); if (udplite || udph->check) { @@ -276,10 +284,11 @@ ignore_obscure_skb: return 1; } -static int tcf_csum_ipv6_udp(struct sk_buff *skb, struct ipv6hdr *ip6h, +static int tcf_csum_ipv6_udp(struct sk_buff *skb, unsigned int ihl, unsigned int ipl, int udplite) { struct udphdr *udph; + const struct ipv6hdr *ip6h; u16 ul; /* @@ -293,6 +302,7 @@ static int tcf_csum_ipv6_udp(struct sk_buff *skb, struct ipv6hdr *ip6h, if (udph == NULL) return 0; + ip6h = ipv6_hdr(skb); ul = ntohs(udph->len); udph->check = 0; @@ -328,7 +338,7 @@ ignore_obscure_skb: static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags) { - struct iphdr *iph; + const struct iphdr *iph; int ntkoff; ntkoff = skb_network_offset(skb); @@ -353,19 +363,19 @@ static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags) break; case IPPROTO_TCP: if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP) - if (!tcf_csum_ipv4_tcp(skb, iph, iph->ihl * 4, + if (!tcf_csum_ipv4_tcp(skb, iph->ihl * 4, ntohs(iph->tot_len))) goto fail; break; case IPPROTO_UDP: if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP) - if (!tcf_csum_ipv4_udp(skb, iph, iph->ihl * 4, + if (!tcf_csum_ipv4_udp(skb, iph->ihl * 4, ntohs(iph->tot_len), 0)) goto fail; break; case IPPROTO_UDPLITE: if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE) - if (!tcf_csum_ipv4_udp(skb, iph, iph->ihl * 4, + if (!tcf_csum_ipv4_udp(skb, iph->ihl * 4, ntohs(iph->tot_len), 1)) goto fail; break; @@ -377,7 +387,7 @@ static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags) pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) goto fail; - ip_send_check(iph); + ip_send_check(ip_hdr(skb)); } return 1; @@ -456,6 +466,7 @@ static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags) ixhl = ipv6_optlen(ip6xh); if (!pskb_may_pull(skb, hl + ixhl + ntkoff)) goto fail; + ip6xh = (void *)(skb_network_header(skb) + hl); if ((nexthdr == NEXTHDR_HOP) && !(tcf_csum_ipv6_hopopts(ip6xh, ixhl, &pl))) goto fail; @@ -464,25 +475,25 @@ static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags) break; case IPPROTO_ICMPV6: if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP) - if (!tcf_csum_ipv6_icmp(skb, ip6h, + if (!tcf_csum_ipv6_icmp(skb, hl, pl + sizeof(*ip6h))) goto fail; goto done; case IPPROTO_TCP: if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP) - if (!tcf_csum_ipv6_tcp(skb, ip6h, + if (!tcf_csum_ipv6_tcp(skb, hl, pl + sizeof(*ip6h))) goto fail; goto done; case IPPROTO_UDP: if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP) - if (!tcf_csum_ipv6_udp(skb, ip6h, hl, + if (!tcf_csum_ipv6_udp(skb, hl, pl + sizeof(*ip6h), 0)) goto fail; goto done; case IPPROTO_UDPLITE: if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE) - if (!tcf_csum_ipv6_udp(skb, ip6h, hl, + if (!tcf_csum_ipv6_udp(skb, hl, pl + sizeof(*ip6h), 1)) goto fail; goto done; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 5c81b2603239..8e118af90973 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -22,7 +22,6 @@ #include <linux/skbuff.h> #include <linux/init.h> #include <linux/kmod.h> -#include <net/netlink.h> #include <linux/err.h> #include <linux/slab.h> #include <net/net_namespace.h> diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 13aa47aa2ffb..1bc210ffcba2 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -962,8 +962,11 @@ cbq_dequeue(struct Qdisc *sch) cbq_update(q); if ((incr -= incr2) < 0) incr = 0; + q->now += incr; + } else { + if (now > q->now) + q->now = now; } - q->now += incr; q->now_rt = now; for (;;) { diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 4e606fcb2534..55786283a3df 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -195,7 +195,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) flow->deficit = q->quantum; flow->dropped = 0; } - if (++sch->q.qlen < sch->limit) + if (++sch->q.qlen <= sch->limit) return NET_XMIT_SUCCESS; q->drop_overlimit++; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index ffad48109a22..eac7e0ee23c1 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -904,7 +904,7 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r, u32 rate) u64 mult; int shift; - r->rate_bps = rate << 3; + r->rate_bps = (u64)rate << 3; r->shift = 0; r->mult = 1; /* diff --git a/net/sctp/associola.c b/net/sctp/associola.c index d2709e2b7be6..423549a714e5 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -104,8 +104,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a /* Initialize the object handling fields. */ atomic_set(&asoc->base.refcnt, 1); - asoc->base.dead = 0; - asoc->base.malloced = 0; + asoc->base.dead = false; /* Initialize the bind addr area. */ sctp_bind_addr_init(&asoc->base.bind_addr, ep->base.bind_addr.port); @@ -371,7 +370,6 @@ struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep, if (!sctp_association_init(asoc, ep, sk, scope, gfp)) goto fail_init; - asoc->base.malloced = 1; SCTP_DBG_OBJCNT_INC(assoc); SCTP_DEBUG_PRINTK("Created asoc %p\n", asoc); @@ -409,7 +407,7 @@ void sctp_association_free(struct sctp_association *asoc) /* Mark as dead, so other users can know this structure is * going away. */ - asoc->base.dead = 1; + asoc->base.dead = true; /* Dispose of any data lying around in the outqueue. */ sctp_outq_free(&asoc->outqueue); @@ -484,10 +482,8 @@ static void sctp_association_destroy(struct sctp_association *asoc) WARN_ON(atomic_read(&asoc->rmem_alloc)); - if (asoc->base.malloced) { - kfree(asoc); - SCTP_DBG_OBJCNT_DEC(assoc); - } + kfree(asoc); + SCTP_DBG_OBJCNT_DEC(assoc); } /* Change the primary destination address for the peer. */ diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 12ed45dbe75d..5fbd7bc6bb11 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -121,8 +121,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, /* Initialize the basic object fields. */ atomic_set(&ep->base.refcnt, 1); - ep->base.dead = 0; - ep->base.malloced = 1; + ep->base.dead = false; /* Create an input queue. */ sctp_inq_init(&ep->base.inqueue); @@ -198,7 +197,7 @@ struct sctp_endpoint *sctp_endpoint_new(struct sock *sk, gfp_t gfp) goto fail; if (!sctp_endpoint_init(ep, sk, gfp)) goto fail_init; - ep->base.malloced = 1; + SCTP_DBG_OBJCNT_INC(ep); return ep; @@ -234,7 +233,7 @@ void sctp_endpoint_add_asoc(struct sctp_endpoint *ep, */ void sctp_endpoint_free(struct sctp_endpoint *ep) { - ep->base.dead = 1; + ep->base.dead = true; ep->base.sk->sk_state = SCTP_SS_CLOSED; @@ -279,11 +278,8 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) if (ep->base.sk) sock_put(ep->base.sk); - /* Finally, free up our memory. */ - if (ep->base.malloced) { - kfree(ep); - SCTP_DBG_OBJCNT_DEC(ep); - } + kfree(ep); + SCTP_DBG_OBJCNT_DEC(ep); } /* Hold a reference to an endpoint. */ diff --git a/net/sctp/proc.c b/net/sctp/proc.c index ab3bba8cb0a8..4e45ee35d0db 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -295,7 +295,8 @@ static void * sctp_assocs_seq_start(struct seq_file *seq, loff_t *pos) seq_printf(seq, " ASSOC SOCK STY SST ST HBKT " "ASSOC-ID TX_QUEUE RX_QUEUE UID INODE LPORT " "RPORT LADDRS <-> RADDRS " - "HBINT INS OUTS MAXRT T1X T2X RTXC\n"); + "HBINT INS OUTS MAXRT T1X T2X RTXC " + "wmema wmemq sndbuf rcvbuf\n"); return (void *)pos; } @@ -349,11 +350,16 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) sctp_seq_dump_local_addrs(seq, epb); seq_printf(seq, "<-> "); sctp_seq_dump_remote_addrs(seq, assoc); - seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d ", + seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d " + "%8d %8d %8d %8d", assoc->hbinterval, assoc->c.sinit_max_instreams, assoc->c.sinit_num_ostreams, assoc->max_retrans, assoc->init_retries, assoc->shutdown_retries, - assoc->rtx_data_chunks); + assoc->rtx_data_chunks, + atomic_read(&sk->sk_wmem_alloc), + sk->sk_wmem_queued, + sk->sk_sndbuf, + sk->sk_rcvbuf); seq_printf(seq, "\n"); } read_unlock(&head->lock); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b9070736b8d9..f631c5ff4dbf 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1119,9 +1119,10 @@ static int __sctp_connect(struct sock* sk, /* Make sure the destination port is correctly set * in all addresses. */ - if (asoc && asoc->peer.port && asoc->peer.port != port) + if (asoc && asoc->peer.port && asoc->peer.port != port) { + err = -EINVAL; goto out_free; - + } /* Check if there already is a matching association on the * endpoint (other than the one created here). @@ -6185,7 +6186,8 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait) /* Is there any exceptional events? */ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR; + mask |= POLLERR | + sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0; if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= POLLRDHUP | POLLIN | POLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) diff --git a/net/socket.c b/net/socket.c index 88f759adf3af..36883fea44f3 100644 --- a/net/socket.c +++ b/net/socket.c @@ -600,7 +600,7 @@ void sock_release(struct socket *sock) } EXPORT_SYMBOL(sock_release); -int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) +void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) { *tx_flags = 0; if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) @@ -609,7 +609,6 @@ int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) *tx_flags |= SKBTX_SW_TSTAMP; if (sock_flag(sk, SOCK_WIFI_STATUS)) *tx_flags |= SKBTX_WIFI_STATUS; - return 0; } EXPORT_SYMBOL(sock_tx_timestamp); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index a9622b6cd916..515ce38e4f4c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -790,6 +790,7 @@ static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg) if (addr) { addr->family = AF_TIPC; addr->addrtype = TIPC_ADDR_ID; + memset(&addr->addr, 0, sizeof(addr->addr)); addr->addr.id.ref = msg_origport(msg); addr->addr.id.node = msg_orignode(msg); addr->addr.name.domain = 0; /* could leave uninitialized */ @@ -904,6 +905,9 @@ static int recv_msg(struct kiocb *iocb, struct socket *sock, goto exit; } + /* will be updated in set_orig_addr() if needed */ + m->msg_namelen = 0; + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); restart: @@ -1013,6 +1017,9 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock, goto exit; } + /* will be updated in set_orig_addr() if needed */ + m->msg_namelen = 0; + target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len); timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 971282b6f6a3..5ca1631de7ef 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1340,7 +1340,6 @@ static void unix_destruct_scm(struct sk_buff *skb) struct scm_cookie scm; memset(&scm, 0, sizeof(scm)); scm.pid = UNIXCB(skb).pid; - scm.cred = UNIXCB(skb).cred; if (UNIXCB(skb).fp) unix_detach_fds(&scm, skb); @@ -1391,8 +1390,8 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen int err = 0; UNIXCB(skb).pid = get_pid(scm->pid); - if (scm->cred) - UNIXCB(skb).cred = get_cred(scm->cred); + UNIXCB(skb).uid = scm->creds.uid; + UNIXCB(skb).gid = scm->creds.gid; UNIXCB(skb).fp = NULL; if (scm->fp && send_fds) err = unix_attach_fds(scm, skb); @@ -1409,13 +1408,13 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, const struct sock *other) { - if (UNIXCB(skb).cred) + if (UNIXCB(skb).pid) return; if (test_bit(SOCK_PASSCRED, &sock->flags) || - (other->sk_socket && - test_bit(SOCK_PASSCRED, &other->sk_socket->flags))) { + !other->sk_socket || + test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) { UNIXCB(skb).pid = get_pid(task_tgid(current)); - UNIXCB(skb).cred = get_current_cred(); + current_euid_egid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); } } @@ -1819,7 +1818,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, siocb->scm = &tmp_scm; memset(&tmp_scm, 0, sizeof(tmp_scm)); } - scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); + scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); unix_set_secdata(siocb->scm, skb); if (!(flags & MSG_PEEK)) { @@ -1991,11 +1990,12 @@ again: if (check_creds) { /* Never glue messages from different writers */ if ((UNIXCB(skb).pid != siocb->scm->pid) || - (UNIXCB(skb).cred != siocb->scm->cred)) + !uid_eq(UNIXCB(skb).uid, siocb->scm->creds.uid) || + !gid_eq(UNIXCB(skb).gid, siocb->scm->creds.gid)) break; - } else { + } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { /* Copy credentials */ - scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); + scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); check_creds = 1; } @@ -2196,7 +2196,9 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, /* exceptional events? */ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR; + mask |= POLLERR | + (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); + if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= POLLRDHUP | POLLIN | POLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index ca511c4f388a..7f93e2a42d7a 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -207,7 +207,7 @@ static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr) struct vsock_sock *vsk; list_for_each_entry(vsk, vsock_bound_sockets(addr), bound_table) - if (vsock_addr_equals_addr_any(addr, &vsk->local_addr)) + if (addr->svm_port == vsk->local_addr.svm_port) return sk_vsock(vsk); return NULL; @@ -220,8 +220,8 @@ static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src, list_for_each_entry(vsk, vsock_connected_sockets(src, dst), connected_table) { - if (vsock_addr_equals_addr(src, &vsk->remote_addr) - && vsock_addr_equals_addr(dst, &vsk->local_addr)) { + if (vsock_addr_equals_addr(src, &vsk->remote_addr) && + dst->svm_port == vsk->local_addr.svm_port) { return sk_vsock(vsk); } } @@ -1670,6 +1670,8 @@ vsock_stream_recvmsg(struct kiocb *kiocb, vsk = vsock_sk(sk); err = 0; + msg->msg_namelen = 0; + lock_sock(sk); if (sk->sk_state != SS_CONNECTED) { diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index faf81b8c1a3a..daff75200e25 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -472,19 +472,16 @@ static struct sock *vmci_transport_get_pending( struct vsock_sock *vlistener; struct vsock_sock *vpending; struct sock *pending; + struct sockaddr_vm src; + + vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port); vlistener = vsock_sk(listener); list_for_each_entry(vpending, &vlistener->pending_links, pending_links) { - struct sockaddr_vm src; - struct sockaddr_vm dst; - - vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port); - vsock_addr_init(&dst, pkt->dg.dst.context, pkt->dst_port); - if (vsock_addr_equals_addr(&src, &vpending->remote_addr) && - vsock_addr_equals_addr(&dst, &vpending->local_addr)) { + pkt->dst_port == vpending->local_addr.svm_port) { pending = sk_vsock(vpending); sock_hold(pending); goto found; @@ -749,10 +746,15 @@ static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg) */ bh_lock_sock(sk); - if (!sock_owned_by_user(sk) && sk->sk_state == SS_CONNECTED) - vmci_trans(vsk)->notify_ops->handle_notify_pkt( - sk, pkt, true, &dst, &src, - &bh_process_pkt); + if (!sock_owned_by_user(sk)) { + /* The local context ID may be out of date, update it. */ + vsk->local_addr.svm_cid = dst.svm_cid; + + if (sk->sk_state == SS_CONNECTED) + vmci_trans(vsk)->notify_ops->handle_notify_pkt( + sk, pkt, true, &dst, &src, + &bh_process_pkt); + } bh_unlock_sock(sk); @@ -912,6 +914,9 @@ static void vmci_transport_recv_pkt_work(struct work_struct *work) lock_sock(sk); + /* The local context ID may be out of date. */ + vsock_sk(sk)->local_addr.svm_cid = pkt->dg.dst.context; + switch (sk->sk_state) { case SS_LISTEN: vmci_transport_recv_listen(sk, pkt); @@ -968,6 +973,10 @@ static int vmci_transport_recv_listen(struct sock *sk, pending = vmci_transport_get_pending(sk, pkt); if (pending) { lock_sock(pending); + + /* The local context ID may be out of date. */ + vsock_sk(pending)->local_addr.svm_cid = pkt->dg.dst.context; + switch (pending->sk_state) { case SS_CONNECTING: err = vmci_transport_recv_connecting_server(sk, @@ -1737,6 +1746,8 @@ static int vmci_transport_dgram_dequeue(struct kiocb *kiocb, if (flags & MSG_OOB || flags & MSG_ERRQUEUE) return -EOPNOTSUPP; + msg->msg_namelen = 0; + /* Retrieve the head sk_buff from the socket's receive queue. */ err = 0; skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err); @@ -1769,7 +1780,6 @@ static int vmci_transport_dgram_dequeue(struct kiocb *kiocb, if (err) goto out; - msg->msg_namelen = 0; if (msg->msg_name) { struct sockaddr_vm *vm_addr; diff --git a/net/vmw_vsock/vsock_addr.c b/net/vmw_vsock/vsock_addr.c index b7df1aea7c59..ec2611b4ea0e 100644 --- a/net/vmw_vsock/vsock_addr.c +++ b/net/vmw_vsock/vsock_addr.c @@ -64,16 +64,6 @@ bool vsock_addr_equals_addr(const struct sockaddr_vm *addr, } EXPORT_SYMBOL_GPL(vsock_addr_equals_addr); -bool vsock_addr_equals_addr_any(const struct sockaddr_vm *addr, - const struct sockaddr_vm *other) -{ - return (addr->svm_cid == VMADDR_CID_ANY || - other->svm_cid == VMADDR_CID_ANY || - addr->svm_cid == other->svm_cid) && - addr->svm_port == other->svm_port; -} -EXPORT_SYMBOL_GPL(vsock_addr_equals_addr_any); - int vsock_addr_cast(const struct sockaddr *addr, size_t len, struct sockaddr_vm **out_addr) { diff --git a/net/vmw_vsock/vsock_addr.h b/net/vmw_vsock/vsock_addr.h index cdfbcefdf843..9ccd5316eac0 100644 --- a/net/vmw_vsock/vsock_addr.h +++ b/net/vmw_vsock/vsock_addr.h @@ -24,8 +24,6 @@ bool vsock_addr_bound(const struct sockaddr_vm *addr); void vsock_addr_unbind(struct sockaddr_vm *addr); bool vsock_addr_equals_addr(const struct sockaddr_vm *addr, const struct sockaddr_vm *other); -bool vsock_addr_equals_addr_any(const struct sockaddr_vm *addr, - const struct sockaddr_vm *other); int vsock_addr_cast(const struct sockaddr *addr, size_t len, struct sockaddr_vm **out_addr); diff --git a/net/wireless/core.c b/net/wireless/core.c index f382cae983ba..84c9ad7e1dca 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -212,6 +212,39 @@ static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data) rdev_rfkill_poll(rdev); } +void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) +{ + lockdep_assert_held(&rdev->devlist_mtx); + lockdep_assert_held(&rdev->sched_scan_mtx); + + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_P2P_DEVICE)) + return; + + if (!wdev->p2p_started) + return; + + rdev_stop_p2p_device(rdev, wdev); + wdev->p2p_started = false; + + rdev->opencount--; + + if (rdev->scan_req && rdev->scan_req->wdev == wdev) { + bool busy = work_busy(&rdev->scan_done_wk); + + /* + * If the work isn't pending or running (in which case it would + * be waiting for the lock we hold) the driver didn't properly + * cancel the scan when the interface was removed. In this case + * warn and leak the scan request object to not crash later. + */ + WARN_ON(!busy); + + rdev->scan_req->aborted = true; + ___cfg80211_scan_done(rdev, !busy); + } +} + static int cfg80211_rfkill_set_block(void *data, bool blocked) { struct cfg80211_registered_device *rdev = data; @@ -221,7 +254,8 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked) return 0; rtnl_lock(); - mutex_lock(&rdev->devlist_mtx); + + /* read-only iteration need not hold the devlist_mtx */ list_for_each_entry(wdev, &rdev->wdev_list, list) { if (wdev->netdev) { @@ -231,18 +265,18 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked) /* otherwise, check iftype */ switch (wdev->iftype) { case NL80211_IFTYPE_P2P_DEVICE: - if (!wdev->p2p_started) - break; - rdev_stop_p2p_device(rdev, wdev); - wdev->p2p_started = false; - rdev->opencount--; + /* but this requires it */ + mutex_lock(&rdev->devlist_mtx); + mutex_lock(&rdev->sched_scan_mtx); + cfg80211_stop_p2p_device(rdev, wdev); + mutex_unlock(&rdev->sched_scan_mtx); + mutex_unlock(&rdev->devlist_mtx); break; default: break; } } - mutex_unlock(&rdev->devlist_mtx); rtnl_unlock(); return 0; @@ -745,17 +779,13 @@ static void wdev_cleanup_work(struct work_struct *work) wdev = container_of(work, struct wireless_dev, cleanup_work); rdev = wiphy_to_dev(wdev->wiphy); - cfg80211_lock_rdev(rdev); + mutex_lock(&rdev->sched_scan_mtx); if (WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev)) { rdev->scan_req->aborted = true; ___cfg80211_scan_done(rdev, true); } - cfg80211_unlock_rdev(rdev); - - mutex_lock(&rdev->sched_scan_mtx); - if (WARN_ON(rdev->sched_scan_req && rdev->sched_scan_req->dev == wdev->netdev)) { __cfg80211_stop_sched_scan(rdev, false); @@ -781,21 +811,19 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev) return; mutex_lock(&rdev->devlist_mtx); + mutex_lock(&rdev->sched_scan_mtx); list_del_rcu(&wdev->list); rdev->devlist_generation++; switch (wdev->iftype) { case NL80211_IFTYPE_P2P_DEVICE: - if (!wdev->p2p_started) - break; - rdev_stop_p2p_device(rdev, wdev); - wdev->p2p_started = false; - rdev->opencount--; + cfg80211_stop_p2p_device(rdev, wdev); break; default: WARN_ON_ONCE(1); break; } + mutex_unlock(&rdev->sched_scan_mtx); mutex_unlock(&rdev->devlist_mtx); } EXPORT_SYMBOL(cfg80211_unregister_wdev); @@ -945,6 +973,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, cfg80211_update_iface_num(rdev, wdev->iftype, 1); cfg80211_lock_rdev(rdev); mutex_lock(&rdev->devlist_mtx); + mutex_lock(&rdev->sched_scan_mtx); wdev_lock(wdev); switch (wdev->iftype) { #ifdef CONFIG_CFG80211_WEXT @@ -976,6 +1005,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, break; } wdev_unlock(wdev); + mutex_unlock(&rdev->sched_scan_mtx); rdev->opencount++; mutex_unlock(&rdev->devlist_mtx); cfg80211_unlock_rdev(rdev); @@ -1096,8 +1126,10 @@ static int __init cfg80211_init(void) goto out_fail_reg; cfg80211_wq = create_singlethread_workqueue("cfg80211"); - if (!cfg80211_wq) + if (!cfg80211_wq) { + err = -ENOMEM; goto out_fail_wq; + } return 0; diff --git a/net/wireless/core.h b/net/wireless/core.h index d5d06fdea961..124e5e773fbc 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -503,6 +503,9 @@ void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, void cfg80211_leave(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); +void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev); + #define CFG80211_MAX_NUM_DIFFERENT_CHANNELS 10 #ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f924d45af1b8..671b69a3c136 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5048,14 +5048,19 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->scan) return -EOPNOTSUPP; - if (rdev->scan_req) - return -EBUSY; + mutex_lock(&rdev->sched_scan_mtx); + if (rdev->scan_req) { + err = -EBUSY; + goto unlock; + } if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) { n_channels = validate_scan_freqs( info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]); - if (!n_channels) - return -EINVAL; + if (!n_channels) { + err = -EINVAL; + goto unlock; + } } else { enum ieee80211_band band; n_channels = 0; @@ -5069,23 +5074,29 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp) n_ssids++; - if (n_ssids > wiphy->max_scan_ssids) - return -EINVAL; + if (n_ssids > wiphy->max_scan_ssids) { + err = -EINVAL; + goto unlock; + } if (info->attrs[NL80211_ATTR_IE]) ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); else ie_len = 0; - if (ie_len > wiphy->max_scan_ie_len) - return -EINVAL; + if (ie_len > wiphy->max_scan_ie_len) { + err = -EINVAL; + goto unlock; + } request = kzalloc(sizeof(*request) + sizeof(*request->ssids) * n_ssids + sizeof(*request->channels) * n_channels + ie_len, GFP_KERNEL); - if (!request) - return -ENOMEM; + if (!request) { + err = -ENOMEM; + goto unlock; + } if (n_ssids) request->ssids = (void *)&request->channels[n_channels]; @@ -5222,6 +5233,8 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) kfree(request); } + unlock: + mutex_unlock(&rdev->sched_scan_mtx); return err; } @@ -8130,20 +8143,9 @@ static int nl80211_stop_p2p_device(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->stop_p2p_device) return -EOPNOTSUPP; - if (!wdev->p2p_started) - return 0; - - rdev_stop_p2p_device(rdev, wdev); - wdev->p2p_started = false; - - mutex_lock(&rdev->devlist_mtx); - rdev->opencount--; - mutex_unlock(&rdev->devlist_mtx); - - if (WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev)) { - rdev->scan_req->aborted = true; - ___cfg80211_scan_done(rdev, true); - } + mutex_lock(&rdev->sched_scan_mtx); + cfg80211_stop_p2p_device(rdev, wdev); + mutex_unlock(&rdev->sched_scan_mtx); return 0; } @@ -8929,7 +8931,7 @@ static int nl80211_add_scan_req(struct sk_buff *msg, struct nlattr *nest; int i; - ASSERT_RDEV_LOCK(rdev); + lockdep_assert_held(&rdev->sched_scan_mtx); if (WARN_ON(!req)) return 0; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 674aadca0079..fd99ea495b7e 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -169,7 +169,7 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak) union iwreq_data wrqu; #endif - ASSERT_RDEV_LOCK(rdev); + lockdep_assert_held(&rdev->sched_scan_mtx); request = rdev->scan_req; @@ -230,9 +230,9 @@ void __cfg80211_scan_done(struct work_struct *wk) rdev = container_of(wk, struct cfg80211_registered_device, scan_done_wk); - cfg80211_lock_rdev(rdev); + mutex_lock(&rdev->sched_scan_mtx); ___cfg80211_scan_done(rdev, false); - cfg80211_unlock_rdev(rdev); + mutex_unlock(&rdev->sched_scan_mtx); } void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted) @@ -698,11 +698,6 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, found = rb_find_bss(dev, tmp, BSS_CMP_REGULAR); if (found) { - found->pub.beacon_interval = tmp->pub.beacon_interval; - found->pub.signal = tmp->pub.signal; - found->pub.capability = tmp->pub.capability; - found->ts = tmp->ts; - /* Update IEs */ if (rcu_access_pointer(tmp->pub.proberesp_ies)) { const struct cfg80211_bss_ies *old; @@ -723,6 +718,8 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, if (found->pub.hidden_beacon_bss && !list_empty(&found->hidden_list)) { + const struct cfg80211_bss_ies *f; + /* * The found BSS struct is one of the probe * response members of a group, but we're @@ -732,6 +729,10 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, * SSID to showing it, which is confusing so * drop this information. */ + + f = rcu_access_pointer(tmp->pub.beacon_ies); + kfree_rcu((struct cfg80211_bss_ies *)f, + rcu_head); goto drop; } @@ -761,6 +762,11 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head); } + + found->pub.beacon_interval = tmp->pub.beacon_interval; + found->pub.signal = tmp->pub.signal; + found->pub.capability = tmp->pub.capability; + found->ts = tmp->ts; } else { struct cfg80211_internal_bss *new; struct cfg80211_internal_bss *hidden; @@ -1056,6 +1062,7 @@ int cfg80211_wext_siwscan(struct net_device *dev, if (IS_ERR(rdev)) return PTR_ERR(rdev); + mutex_lock(&rdev->sched_scan_mtx); if (rdev->scan_req) { err = -EBUSY; goto out; @@ -1162,6 +1169,7 @@ int cfg80211_wext_siwscan(struct net_device *dev, dev_hold(dev); } out: + mutex_unlock(&rdev->sched_scan_mtx); kfree(creq); cfg80211_unlock_rdev(rdev); return err; diff --git a/net/wireless/sme.c b/net/wireless/sme.c index bad4c4b5e4eb..a9dc5c736df0 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -85,6 +85,7 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) ASSERT_RTNL(); ASSERT_RDEV_LOCK(rdev); ASSERT_WDEV_LOCK(wdev); + lockdep_assert_held(&rdev->sched_scan_mtx); if (rdev->scan_req) return -EBUSY; @@ -227,6 +228,7 @@ void cfg80211_conn_work(struct work_struct *work) rtnl_lock(); cfg80211_lock_rdev(rdev); mutex_lock(&rdev->devlist_mtx); + mutex_lock(&rdev->sched_scan_mtx); list_for_each_entry(wdev, &rdev->wdev_list, list) { wdev_lock(wdev); @@ -234,7 +236,7 @@ void cfg80211_conn_work(struct work_struct *work) wdev_unlock(wdev); continue; } - if (wdev->sme_state != CFG80211_SME_CONNECTING) { + if (wdev->sme_state != CFG80211_SME_CONNECTING || !wdev->conn) { wdev_unlock(wdev); continue; } @@ -251,6 +253,7 @@ void cfg80211_conn_work(struct work_struct *work) wdev_unlock(wdev); } + mutex_unlock(&rdev->sched_scan_mtx); mutex_unlock(&rdev->devlist_mtx); cfg80211_unlock_rdev(rdev); rtnl_unlock(); @@ -324,11 +327,9 @@ void cfg80211_sme_scan_done(struct net_device *dev) { struct wireless_dev *wdev = dev->ieee80211_ptr; - mutex_lock(&wiphy_to_dev(wdev->wiphy)->devlist_mtx); wdev_lock(wdev); __cfg80211_sme_scan_done(dev); wdev_unlock(wdev); - mutex_unlock(&wiphy_to_dev(wdev->wiphy)->devlist_mtx); } void cfg80211_sme_rx_auth(struct net_device *dev, @@ -928,9 +929,12 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, int err; mutex_lock(&rdev->devlist_mtx); + /* might request scan - scan_mtx -> wdev_mtx dependency */ + mutex_lock(&rdev->sched_scan_mtx); wdev_lock(dev->ieee80211_ptr); err = __cfg80211_connect(rdev, dev, connect, connkeys, NULL); wdev_unlock(dev->ieee80211_ptr); + mutex_unlock(&rdev->sched_scan_mtx); mutex_unlock(&rdev->devlist_mtx); return err; diff --git a/net/wireless/trace.h b/net/wireless/trace.h index ccadef2106ac..3c2033b8f596 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -27,7 +27,8 @@ #define WIPHY_PR_ARG __entry->wiphy_name #define WDEV_ENTRY __field(u32, id) -#define WDEV_ASSIGN (__entry->id) = (wdev ? wdev->identifier : 0) +#define WDEV_ASSIGN (__entry->id) = (!IS_ERR_OR_NULL(wdev) \ + ? wdev->identifier : 0) #define WDEV_PR_FMT "wdev(%u)" #define WDEV_PR_ARG (__entry->id) @@ -1778,7 +1779,7 @@ TRACE_EVENT(rdev_set_mac_acl, ), TP_fast_assign( WIPHY_ASSIGN; - WIPHY_ASSIGN; + NETDEV_ASSIGN; __entry->acl_policy = params->acl_policy; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", acl policy: %d", diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index fb9622f6d99c..e79cb5c0655a 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -89,6 +89,7 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, cfg80211_lock_rdev(rdev); mutex_lock(&rdev->devlist_mtx); + mutex_lock(&rdev->sched_scan_mtx); wdev_lock(wdev); if (wdev->sme_state != CFG80211_SME_IDLE) { @@ -135,6 +136,7 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, err = cfg80211_mgd_wext_connect(rdev, wdev); out: wdev_unlock(wdev); + mutex_unlock(&rdev->sched_scan_mtx); mutex_unlock(&rdev->devlist_mtx); cfg80211_unlock_rdev(rdev); return err; @@ -190,6 +192,7 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev, cfg80211_lock_rdev(rdev); mutex_lock(&rdev->devlist_mtx); + mutex_lock(&rdev->sched_scan_mtx); wdev_lock(wdev); err = 0; @@ -223,6 +226,7 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev, err = cfg80211_mgd_wext_connect(rdev, wdev); out: wdev_unlock(wdev); + mutex_unlock(&rdev->sched_scan_mtx); mutex_unlock(&rdev->devlist_mtx); cfg80211_unlock_rdev(rdev); return err; @@ -285,6 +289,7 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev, cfg80211_lock_rdev(rdev); mutex_lock(&rdev->devlist_mtx); + mutex_lock(&rdev->sched_scan_mtx); wdev_lock(wdev); if (wdev->sme_state != CFG80211_SME_IDLE) { @@ -313,6 +318,7 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev, err = cfg80211_mgd_wext_connect(rdev, wdev); out: wdev_unlock(wdev); + mutex_unlock(&rdev->sched_scan_mtx); mutex_unlock(&rdev->devlist_mtx); cfg80211_unlock_rdev(rdev); return err; diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 35754cc8a9e5..8dafe6d3c6e4 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -334,6 +334,70 @@ static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event) x->xflags &= ~XFRM_TIME_DEFER; } +static void xfrm_replay_notify_esn(struct xfrm_state *x, int event) +{ + u32 seq_diff, oseq_diff; + struct km_event c; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + struct xfrm_replay_state_esn *preplay_esn = x->preplay_esn; + + /* we send notify messages in case + * 1. we updated on of the sequence numbers, and the seqno difference + * is at least x->replay_maxdiff, in this case we also update the + * timeout of our timer function + * 2. if x->replay_maxage has elapsed since last update, + * and there were changes + * + * The state structure must be locked! + */ + + switch (event) { + case XFRM_REPLAY_UPDATE: + if (!x->replay_maxdiff) + break; + + if (replay_esn->seq_hi == preplay_esn->seq_hi) + seq_diff = replay_esn->seq - preplay_esn->seq; + else + seq_diff = ~preplay_esn->seq + replay_esn->seq + 1; + + if (replay_esn->oseq_hi == preplay_esn->oseq_hi) + oseq_diff = replay_esn->oseq - preplay_esn->oseq; + else + oseq_diff = ~preplay_esn->oseq + replay_esn->oseq + 1; + + if (seq_diff < x->replay_maxdiff && + oseq_diff < x->replay_maxdiff) { + + if (x->xflags & XFRM_TIME_DEFER) + event = XFRM_REPLAY_TIMEOUT; + else + return; + } + + break; + + case XFRM_REPLAY_TIMEOUT: + if (memcmp(x->replay_esn, x->preplay_esn, + xfrm_replay_state_esn_len(replay_esn)) == 0) { + x->xflags |= XFRM_TIME_DEFER; + return; + } + + break; + } + + memcpy(x->preplay_esn, x->replay_esn, + xfrm_replay_state_esn_len(replay_esn)); + c.event = XFRM_MSG_NEWAE; + c.data.aevent = event; + km_state_notify(x, &c); + + if (x->replay_maxage && + !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) + x->xflags &= ~XFRM_TIME_DEFER; +} + static int xfrm_replay_overflow_esn(struct xfrm_state *x, struct sk_buff *skb) { int err = 0; @@ -510,7 +574,7 @@ static struct xfrm_replay xfrm_replay_esn = { .advance = xfrm_replay_advance_esn, .check = xfrm_replay_check_esn, .recheck = xfrm_replay_recheck_esn, - .notify = xfrm_replay_notify_bmp, + .notify = xfrm_replay_notify_esn, .overflow = xfrm_replay_overflow_esn, }; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 2c341bdaf47c..78f66fa92449 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1187,6 +1187,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) goto error; x->props.flags = orig->props.flags; + x->props.extra_flags = orig->props.extra_flags; x->curlft.add_time = orig->curlft.add_time; x->km.state = orig->km.state; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index fbd9e6cd0fd7..aa778748c565 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -515,6 +515,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, copy_from_user_state(x, p); + if (attrs[XFRMA_SA_EXTRA_FLAGS]) + x->props.extra_flags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]); + if ((err = attach_aead(&x->aead, &x->props.ealgo, attrs[XFRMA_ALG_AEAD]))) goto error; @@ -779,6 +782,13 @@ static int copy_to_user_state_extra(struct xfrm_state *x, copy_to_user_state(x, p); + if (x->props.extra_flags) { + ret = nla_put_u32(skb, XFRMA_SA_EXTRA_FLAGS, + x->props.extra_flags); + if (ret) + goto out; + } + if (x->coaddr) { ret = nla_put(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr); if (ret) @@ -2302,9 +2312,10 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_MARK] = { .len = sizeof(struct xfrm_mark) }, [XFRMA_TFCPAD] = { .type = NLA_U32 }, [XFRMA_REPLAY_ESN_VAL] = { .len = sizeof(struct xfrm_replay_state_esn) }, + [XFRMA_SA_EXTRA_FLAGS] = { .type = NLA_U32 }, }; -static struct xfrm_link { +static const struct xfrm_link { int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **); int (*dump)(struct sk_buff *, struct netlink_callback *); int (*done)(struct netlink_callback *); @@ -2338,7 +2349,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct nlattr *attrs[XFRMA_MAX+1]; - struct xfrm_link *link; + const struct xfrm_link *link; int type, err; type = nlh->nlmsg_type; @@ -2495,6 +2506,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x) x->security->ctx_len); if (x->coaddr) l += nla_total_size(sizeof(*x->coaddr)); + if (x->props.extra_flags) + l += nla_total_size(sizeof(x->props.extra_flags)); /* Must count x->lastused as it may become non-zero behind our back. */ l += nla_total_size(sizeof(u64)); |