aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBen Hutchings <ben@decadent.org.uk>2015-12-15 17:40:55 +0000
committerBen Hutchings <ben@decadent.org.uk>2015-12-15 17:40:55 +0000
commitc4e89babe4aef33abcd443d784bd6535d02401dd (patch)
tree1eab4fc86366c1c12e161a957442bf1884460b3c
parentd2de7ad896a9ed11c5e9dfcde45d692ab0fe82f9 (diff)
downloadkernel_replicant_linux-c4e89babe4aef33abcd443d784bd6535d02401dd.tar.gz
kernel_replicant_linux-c4e89babe4aef33abcd443d784bd6535d02401dd.tar.bz2
kernel_replicant_linux-c4e89babe4aef33abcd443d784bd6535d02401dd.zip
Update to 4.3.3
Drop 3 security fixes that were included in it.
-rw-r--r--debian/changelog71
-rw-r--r--debian/patches/bugfix/all/btrfs-fix-truncation-of-compressed-and-inlined-exten.patch283
-rw-r--r--debian/patches/bugfix/all/rds-fix-race-condition-when-sending-a-message-on-unbound-socket.patch69
-rw-r--r--debian/patches/bugfix/all/unix-avoid-use-after-free-in-ep_remove_wait_queue.patch325
-rw-r--r--debian/patches/series3
5 files changed, 70 insertions, 681 deletions
diff --git a/debian/changelog b/debian/changelog
index 8291d33788a2..3b46b76c7d8a 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,5 +1,74 @@
-linux (4.3.1-2) UNRELEASED; urgency=medium
+linux (4.3.3-1) UNRELEASED; urgency=medium
+ * New upstream stable update:
+ https://www.kernel.org/pub/linux/kernel/v4.x/ChangeLog-4.3.2
+ - X.509: Fix the time validation [ver #2]
+ https://www.kernel.org/pub/linux/kernel/v4.x/ChangeLog-4.3.3
+ - r8169: fix kasan reported skb use-after-free. (regression in 4.3)
+ - af-unix: fix use-after-free with concurrent readers while splicing
+ (regression in 4.2)
+ - af_unix: don't append consumed skbs to sk_receive_queue
+ (regression in 4.2)
+ - af_unix: take receive queue lock while appending new skb
+ (regression in 4.2)
+ - af-unix: passcred support for sendpage (regression in 4.2)
+ - ipv6: Avoid creating RTF_CACHE from a rt that is not managed by fib6 tree
+ (regression in 4.2)
+ - ipv6: Check expire on DST_NOCACHE route
+ - ipv6: Check rt->dst.from for the DST_NOCACHE route (regression in 4.3)
+ - Revert "ipv6: ndisc: inherit metadata dst when creating ndisc requests"
+ (regression in 4.3)
+ - packet: only allow extra vlan len on ethernet devices
+ - packet: infer protocol from ethernet header if unset
+ - packet: fix tpacket_snd max frame len
+ - sctp: translate host order to network order when setting a hmacid
+ - net/mlx5e: Added self loopback prevention (regression in 4.3)
+ - net/mlx4_core: Fix sleeping while holding spinlock at rem_slave_counters
+ (regression in 4.2)
+ - ip_tunnel: disable preemption when updating per-cpu tstats
+ - net/ip6_tunnel: fix dst leak (regression in 4.3)
+ - tcp: disable Fast Open on timeouts after handshake
+ - tcp: fix potential huge kmalloc() calls in TCP_REPAIR
+ - tcp: initialize tp->copied_seq in case of cross SYN connection
+ - net, scm: fix PaX detected msg_controllen overflow in scm_detach_fds
+ - net: ipmr: fix static mfc/dev leaks on table destruction
+ - net: ip6mr: fix static mfc/dev leaks on table destruction
+ - vrf: fix double free and memory corruption on register_netdevice failure
+ - tipc: fix error handling of expanding buffer headroom (regression in 4.3)
+ - ipv6: distinguish frag queues by device for multicast and link-local
+ packets
+ - bpf, array: fix heap out-of-bounds access when updating elements
+ - ipv6: add complete rcu protection around np->opt
+ - net/neighbour: fix crash at dumping device-agnostic proxy entries
+ - ipv6: sctp: implement sctp_v6_destroy_sock()
+ - openvswitch: fix hangup on vxlan/gre/geneve device deletion
+ - net_sched: fix qdisc_tree_decrease_qlen() races
+ - btrfs: fix resending received snapshot with parent (regression in 4.2)
+ - Btrfs: fix file corruption and data loss after cloning inline extents
+ - Btrfs: fix regression when running delayed references (regression in 4.2)
+ - Btrfs: fix race leading to incorrect item deletion when dropping extents
+ - Btrfs: fix race leading to BUG_ON when running delalloc for nodatacow
+ - Btrfs: fix race when listing an inode's xattrs
+ - rbd: don't put snap_context twice in rbd_queue_workfn()
+ - ext4 crypto: fix memory leak in ext4_bio_write_page()
+ - ext4 crypto: fix bugs in ext4_encrypted_zeroout()
+ - ext4: fix potential use after free in __ext4_journal_stop
+ (regression in 4.2)
+ - ext4, jbd2: ensure entering into panic after recording an error in
+ superblock
+ - nfsd: serialize state seqid morphing operations
+ - nfsd: eliminate sending duplicate and repeated delegations
+ - nfs4: start callback_ident at idr 1
+ - nfs4: resend LAYOUTGET when there is a race that changes the seqid
+ - nfs: if we have no valid attrs, then don't declare the attribute cache
+ valid
+ - ocfs2: fix umask ignored issue
+ - block: fix segment split (regression in 4.3)
+ - ceph: fix message length computation
+ - Btrfs: fix regression running delayed references when using qgroups
+ (regression in 4.2)
+
+ [ Ben Hutchings ]
* net: add validation for the socket syscall protocol argument (CVE-2015-8543)
* [armel/kirkwood] udeb: Override inclusion of gpio_keys in input-modules
(fixes FTBFS)
diff --git a/debian/patches/bugfix/all/btrfs-fix-truncation-of-compressed-and-inlined-exten.patch b/debian/patches/bugfix/all/btrfs-fix-truncation-of-compressed-and-inlined-exten.patch
deleted file mode 100644
index 7cd84014ddf2..000000000000
--- a/debian/patches/bugfix/all/btrfs-fix-truncation-of-compressed-and-inlined-exten.patch
+++ /dev/null
@@ -1,283 +0,0 @@
-From: Filipe Manana <fdmanana@suse.com>
-Date: Fri, 16 Oct 2015 12:34:25 +0100
-Subject: Btrfs: fix truncation of compressed and inlined extents
-Origin: https://git.kernel.org/linus/0305cd5f7fca85dae392b9ba85b116896eb7c1c7
-
-When truncating a file to a smaller size which consists of an inline
-extent that is compressed, we did not discard (or made unusable) the
-data between the new file size and the old file size, wasting metadata
-space and allowing for the truncated data to be leaked and the data
-corruption/loss mentioned below.
-We were also not correctly decrementing the number of bytes used by the
-inode, we were setting it to zero, giving a wrong report for callers of
-the stat(2) syscall. The fsck tool also reported an error about a mismatch
-between the nbytes of the file versus the real space used by the file.
-
-Now because we weren't discarding the truncated region of the file, it
-was possible for a caller of the clone ioctl to actually read the data
-that was truncated, allowing for a security breach without requiring root
-access to the system, using only standard filesystem operations. The
-scenario is the following:
-
- 1) User A creates a file which consists of an inline and compressed
- extent with a size of 2000 bytes - the file is not accessible to
- any other users (no read, write or execution permission for anyone
- else);
-
- 2) The user truncates the file to a size of 1000 bytes;
-
- 3) User A makes the file world readable;
-
- 4) User B creates a file consisting of an inline extent of 2000 bytes;
-
- 5) User B issues a clone operation from user A's file into its own
- file (using a length argument of 0, clone the whole range);
-
- 6) User B now gets to see the 1000 bytes that user A truncated from
- its file before it made its file world readbale. User B also lost
- the bytes in the range [1000, 2000[ bytes from its own file, but
- that might be ok if his/her intention was reading stale data from
- user A that was never supposed to be public.
-
-Note that this contrasts with the case where we truncate a file from 2000
-bytes to 1000 bytes and then truncate it back from 1000 to 2000 bytes. In
-this case reading any byte from the range [1000, 2000[ will return a value
-of 0x00, instead of the original data.
-
-This problem exists since the clone ioctl was added and happens both with
-and without my recent data loss and file corruption fixes for the clone
-ioctl (patch "Btrfs: fix file corruption and data loss after cloning
-inline extents").
-
-So fix this by truncating the compressed inline extents as we do for the
-non-compressed case, which involves decompressing, if the data isn't already
-in the page cache, compressing the truncated version of the extent, writing
-the compressed content into the inline extent and then truncate it.
-
-The following test case for fstests reproduces the problem. In order for
-the test to pass both this fix and my previous fix for the clone ioctl
-that forbids cloning a smaller inline extent into a larger one,
-which is titled "Btrfs: fix file corruption and data loss after cloning
-inline extents", are needed. Without that other fix the test fails in a
-different way that does not leak the truncated data, instead part of
-destination file gets replaced with zeroes (because the destination file
-has a larger inline extent than the source).
-
- seq=`basename $0`
- seqres=$RESULT_DIR/$seq
- echo "QA output created by $seq"
- tmp=/tmp/$$
- status=1 # failure is the default!
- trap "_cleanup; exit \$status" 0 1 2 3 15
-
- _cleanup()
- {
- rm -f $tmp.*
- }
-
- # get standard environment, filters and checks
- . ./common/rc
- . ./common/filter
-
- # real QA test starts here
- _need_to_be_root
- _supported_fs btrfs
- _supported_os Linux
- _require_scratch
- _require_cloner
-
- rm -f $seqres.full
-
- _scratch_mkfs >>$seqres.full 2>&1
- _scratch_mount "-o compress"
-
- # Create our test files. File foo is going to be the source of a clone operation
- # and consists of a single inline extent with an uncompressed size of 512 bytes,
- # while file bar consists of a single inline extent with an uncompressed size of
- # 256 bytes. For our test's purpose, it's important that file bar has an inline
- # extent with a size smaller than foo's inline extent.
- $XFS_IO_PROG -f -c "pwrite -S 0xa1 0 128" \
- -c "pwrite -S 0x2a 128 384" \
- $SCRATCH_MNT/foo | _filter_xfs_io
- $XFS_IO_PROG -f -c "pwrite -S 0xbb 0 256" $SCRATCH_MNT/bar | _filter_xfs_io
-
- # Now durably persist all metadata and data. We do this to make sure that we get
- # on disk an inline extent with a size of 512 bytes for file foo.
- sync
-
- # Now truncate our file foo to a smaller size. Because it consists of a
- # compressed and inline extent, btrfs did not shrink the inline extent to the
- # new size (if the extent was not compressed, btrfs would shrink it to 128
- # bytes), it only updates the inode's i_size to 128 bytes.
- $XFS_IO_PROG -c "truncate 128" $SCRATCH_MNT/foo
-
- # Now clone foo's inline extent into bar.
- # This clone operation should fail with errno EOPNOTSUPP because the source
- # file consists only of an inline extent and the file's size is smaller than
- # the inline extent of the destination (128 bytes < 256 bytes). However the
- # clone ioctl was not prepared to deal with a file that has a size smaller
- # than the size of its inline extent (something that happens only for compressed
- # inline extents), resulting in copying the full inline extent from the source
- # file into the destination file.
- #
- # Note that btrfs' clone operation for inline extents consists of removing the
- # inline extent from the destination inode and copy the inline extent from the
- # source inode into the destination inode, meaning that if the destination
- # inode's inline extent is larger (N bytes) than the source inode's inline
- # extent (M bytes), some bytes (N - M bytes) will be lost from the destination
- # file. Btrfs could copy the source inline extent's data into the destination's
- # inline extent so that we would not lose any data, but that's currently not
- # done due to the complexity that would be needed to deal with such cases
- # (specially when one or both extents are compressed), returning EOPNOTSUPP, as
- # it's normally not a very common case to clone very small files (only case
- # where we get inline extents) and copying inline extents does not save any
- # space (unlike for normal, non-inlined extents).
- $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/foo $SCRATCH_MNT/bar
-
- # Now because the above clone operation used to succeed, and due to foo's inline
- # extent not being shinked by the truncate operation, our file bar got the whole
- # inline extent copied from foo, making us lose the last 128 bytes from bar
- # which got replaced by the bytes in range [128, 256[ from foo before foo was
- # truncated - in other words, data loss from bar and being able to read old and
- # stale data from foo that should not be possible to read anymore through normal
- # filesystem operations. Contrast with the case where we truncate a file from a
- # size N to a smaller size M, truncate it back to size N and then read the range
- # [M, N[, we should always get the value 0x00 for all the bytes in that range.
-
- # We expected the clone operation to fail with errno EOPNOTSUPP and therefore
- # not modify our file's bar data/metadata. So its content should be 256 bytes
- # long with all bytes having the value 0xbb.
- #
- # Without the btrfs bug fix, the clone operation succeeded and resulted in
- # leaking truncated data from foo, the bytes that belonged to its range
- # [128, 256[, and losing data from bar in that same range. So reading the
- # file gave us the following content:
- #
- # 0000000 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1
- # *
- # 0000200 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a
- # *
- # 0000400
- echo "File bar's content after the clone operation:"
- od -t x1 $SCRATCH_MNT/bar
-
- # Also because the foo's inline extent was not shrunk by the truncate
- # operation, btrfs' fsck, which is run by the fstests framework everytime a
- # test completes, failed reporting the following error:
- #
- # root 5 inode 257 errors 400, nbytes wrong
-
- status=0
- exit
-
-Cc: stable@vger.kernel.org
-Signed-off-by: Filipe Manana <fdmanana@suse.com>
----
- fs/btrfs/inode.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++----------
- 1 file changed, 68 insertions(+), 14 deletions(-)
-
---- a/fs/btrfs/inode.c
-+++ b/fs/btrfs/inode.c
-@@ -4184,6 +4184,47 @@ static int truncate_space_check(struct b
-
- }
-
-+static int truncate_inline_extent(struct inode *inode,
-+ struct btrfs_path *path,
-+ struct btrfs_key *found_key,
-+ const u64 item_end,
-+ const u64 new_size)
-+{
-+ struct extent_buffer *leaf = path->nodes[0];
-+ int slot = path->slots[0];
-+ struct btrfs_file_extent_item *fi;
-+ u32 size = (u32)(new_size - found_key->offset);
-+ struct btrfs_root *root = BTRFS_I(inode)->root;
-+
-+ fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
-+
-+ if (btrfs_file_extent_compression(leaf, fi) != BTRFS_COMPRESS_NONE) {
-+ loff_t offset = new_size;
-+ loff_t page_end = ALIGN(offset, PAGE_CACHE_SIZE);
-+
-+ /*
-+ * Zero out the remaining of the last page of our inline extent,
-+ * instead of directly truncating our inline extent here - that
-+ * would be much more complex (decompressing all the data, then
-+ * compressing the truncated data, which might be bigger than
-+ * the size of the inline extent, resize the extent, etc).
-+ * We release the path because to get the page we might need to
-+ * read the extent item from disk (data not in the page cache).
-+ */
-+ btrfs_release_path(path);
-+ return btrfs_truncate_page(inode, offset, page_end - offset, 0);
-+ }
-+
-+ btrfs_set_file_extent_ram_bytes(leaf, fi, size);
-+ size = btrfs_file_extent_calc_inline_size(size);
-+ btrfs_truncate_item(root, path, size, 1);
-+
-+ if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
-+ inode_sub_bytes(inode, item_end + 1 - new_size);
-+
-+ return 0;
-+}
-+
- /*
- * this can truncate away extent items, csum items and directory items.
- * It starts at a high offset and removes keys until it can't find
-@@ -4378,27 +4419,40 @@ search_again:
- * special encodings
- */
- if (!del_item &&
-- btrfs_file_extent_compression(leaf, fi) == 0 &&
- btrfs_file_extent_encryption(leaf, fi) == 0 &&
- btrfs_file_extent_other_encoding(leaf, fi) == 0) {
-- u32 size = new_size - found_key.offset;
--
-- if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
-- inode_sub_bytes(inode, item_end + 1 -
-- new_size);
-
- /*
-- * update the ram bytes to properly reflect
-- * the new size of our item
-+ * Need to release path in order to truncate a
-+ * compressed extent. So delete any accumulated
-+ * extent items so far.
- */
-- btrfs_set_file_extent_ram_bytes(leaf, fi, size);
-- size =
-- btrfs_file_extent_calc_inline_size(size);
-- btrfs_truncate_item(root, path, size, 1);
-+ if (btrfs_file_extent_compression(leaf, fi) !=
-+ BTRFS_COMPRESS_NONE && pending_del_nr) {
-+ err = btrfs_del_items(trans, root, path,
-+ pending_del_slot,
-+ pending_del_nr);
-+ if (err) {
-+ btrfs_abort_transaction(trans,
-+ root,
-+ err);
-+ goto error;
-+ }
-+ pending_del_nr = 0;
-+ }
-+
-+ err = truncate_inline_extent(inode, path,
-+ &found_key,
-+ item_end,
-+ new_size);
-+ if (err) {
-+ btrfs_abort_transaction(trans,
-+ root, err);
-+ goto error;
-+ }
- } else if (test_bit(BTRFS_ROOT_REF_COWS,
- &root->state)) {
-- inode_sub_bytes(inode, item_end + 1 -
-- found_key.offset);
-+ inode_sub_bytes(inode, item_end + 1 - new_size);
- }
- }
- delete:
diff --git a/debian/patches/bugfix/all/rds-fix-race-condition-when-sending-a-message-on-unbound-socket.patch b/debian/patches/bugfix/all/rds-fix-race-condition-when-sending-a-message-on-unbound-socket.patch
deleted file mode 100644
index 299242e21dba..000000000000
--- a/debian/patches/bugfix/all/rds-fix-race-condition-when-sending-a-message-on-unbound-socket.patch
+++ /dev/null
@@ -1,69 +0,0 @@
-From: Quentin Casasnovas <quentin.casasnovas@oracle.com>
-Subject: RDS: fix race condition when sending a message on unbound socket.
-Date: Fri, 16 Oct 2015 17:11:42 +0200
-Origin: https://lkml.org/lkml/2015/10/16/530
-
-Sasha's found a NULL pointer dereference in the RDS connection code when
-sending a message to an apparently unbound socket. The problem is caused
-by the code checking if the socket is bound in rds_sendmsg(), which checks
-the rs_bound_addr field without taking a lock on the socket. This opens a
-race where rs_bound_addr is temporarily set but where the transport is not
-in rds_bind(), leading to a NULL pointer dereference when trying to
-dereference 'trans' in __rds_conn_create().
-
-Vegard wrote a reproducer for this issue, so kindly ask him to share if
-you're interested.
-
-I cannot reproduce the NULL pointer dereference using Vegard's reproducer
-with this patch, whereas I could without.
-
-Complete earlier incomplete fix to CVE-2015-6937:
-
- 74e98eb08588 ("RDS: verify the underlying transport exists before creating a connection")
-
-Signed-off-by: Quentin Casasnovas <quentin.casasnovas@oracle.com>
-Reviewed-by: Vegard Nossum <vegard.nossum@oracle.com>
-Reviewed-by: Sasha Levin <sasha.levin@oracle.com>
-Cc: Vegard Nossum <vegard.nossum@oracle.com>
-Cc: Sasha Levin <sasha.levin@oracle.com>
-Cc: Chien Yen <chien.yen@oracle.com>
-Cc: Santosh Shilimkar <santosh.shilimkar@oracle.com>
-Cc: David S. Miller <davem@davemloft.net>
-Cc: stable@vger.kernel.org
----
- net/rds/connection.c | 6 ------
- net/rds/send.c | 4 +++-
- 2 files changed, 3 insertions(+), 7 deletions(-)
-
---- a/net/rds/connection.c
-+++ b/net/rds/connection.c
-@@ -190,12 +190,6 @@ new_conn:
- }
- }
-
-- if (trans == NULL) {
-- kmem_cache_free(rds_conn_slab, conn);
-- conn = ERR_PTR(-ENODEV);
-- goto out;
-- }
--
- conn->c_trans = trans;
-
- ret = trans->conn_alloc(conn, gfp);
---- a/net/rds/send.c
-+++ b/net/rds/send.c
-@@ -1009,11 +1009,13 @@ int rds_sendmsg(struct socket *sock, str
- release_sock(sk);
- }
-
-- /* racing with another thread binding seems ok here */
-+ lock_sock(sk);
- if (daddr == 0 || rs->rs_bound_addr == 0) {
-+ release_sock(sk);
- ret = -ENOTCONN; /* XXX not a great errno */
- goto out;
- }
-+ release_sock(sk);
-
- if (payload_len > rds_sk_sndbuf(rs)) {
- ret = -EMSGSIZE;
diff --git a/debian/patches/bugfix/all/unix-avoid-use-after-free-in-ep_remove_wait_queue.patch b/debian/patches/bugfix/all/unix-avoid-use-after-free-in-ep_remove_wait_queue.patch
deleted file mode 100644
index 6fb47c7983f2..000000000000
--- a/debian/patches/bugfix/all/unix-avoid-use-after-free-in-ep_remove_wait_queue.patch
+++ /dev/null
@@ -1,325 +0,0 @@
-From: Rainer Weikusat <rweikusat@mobileactivedefense.com>
-Date: Fri, 20 Nov 2015 22:07:23 +0000
-Subject: unix: avoid use-after-free in ep_remove_wait_queue
-Origin: https://git.kernel.org/cgit/linux/kernel/git/davem/net.git//commit?id=7d267278a9ece963d77eefec61630223fce08c6c
-
-Rainer Weikusat <rweikusat@mobileactivedefense.com> writes:
-An AF_UNIX datagram socket being the client in an n:1 association with
-some server socket is only allowed to send messages to the server if the
-receive queue of this socket contains at most sk_max_ack_backlog
-datagrams. This implies that prospective writers might be forced to go
-to sleep despite none of the message presently enqueued on the server
-receive queue were sent by them. In order to ensure that these will be
-woken up once space becomes again available, the present unix_dgram_poll
-routine does a second sock_poll_wait call with the peer_wait wait queue
-of the server socket as queue argument (unix_dgram_recvmsg does a wake
-up on this queue after a datagram was received). This is inherently
-problematic because the server socket is only guaranteed to remain alive
-for as long as the client still holds a reference to it. In case the
-connection is dissolved via connect or by the dead peer detection logic
-in unix_dgram_sendmsg, the server socket may be freed despite "the
-polling mechanism" (in particular, epoll) still has a pointer to the
-corresponding peer_wait queue. There's no way to forcibly deregister a
-wait queue with epoll.
-
-Based on an idea by Jason Baron, the patch below changes the code such
-that a wait_queue_t belonging to the client socket is enqueued on the
-peer_wait queue of the server whenever the peer receive queue full
-condition is detected by either a sendmsg or a poll. A wake up on the
-peer queue is then relayed to the ordinary wait queue of the client
-socket via wake function. The connection to the peer wait queue is again
-dissolved if either a wake up is about to be relayed or the client
-socket reconnects or a dead peer is detected or the client socket is
-itself closed. This enables removing the second sock_poll_wait from
-unix_dgram_poll, thus avoiding the use-after-free, while still ensuring
-that no blocked writer sleeps forever.
-
-Signed-off-by: Rainer Weikusat <rweikusat@mobileactivedefense.com>
-Fixes: ec0d215f9420 ("af_unix: fix 'poll for write'/connected DGRAM sockets")
-Reviewed-by: Jason Baron <jbaron@akamai.com>
-Signed-off-by: David S. Miller <davem@davemloft.net>
-[bwh: Backported to 4.2: adjust context]
----
- include/net/af_unix.h | 1 +
- net/unix/af_unix.c | 183 ++++++++++++++++++++++++++++++++++++++++++++------
- 2 files changed, 165 insertions(+), 19 deletions(-)
-
---- a/include/net/af_unix.h
-+++ b/include/net/af_unix.h
-@@ -62,6 +62,7 @@ struct unix_sock {
- #define UNIX_GC_CANDIDATE 0
- #define UNIX_GC_MAYBE_CYCLE 1
- struct socket_wq peer_wq;
-+ wait_queue_t peer_wake;
- };
-
- static inline struct unix_sock *unix_sk(const struct sock *sk)
---- a/net/unix/af_unix.c
-+++ b/net/unix/af_unix.c
-@@ -326,6 +326,118 @@ found:
- return s;
- }
-
-+/* Support code for asymmetrically connected dgram sockets
-+ *
-+ * If a datagram socket is connected to a socket not itself connected
-+ * to the first socket (eg, /dev/log), clients may only enqueue more
-+ * messages if the present receive queue of the server socket is not
-+ * "too large". This means there's a second writeability condition
-+ * poll and sendmsg need to test. The dgram recv code will do a wake
-+ * up on the peer_wait wait queue of a socket upon reception of a
-+ * datagram which needs to be propagated to sleeping would-be writers
-+ * since these might not have sent anything so far. This can't be
-+ * accomplished via poll_wait because the lifetime of the server
-+ * socket might be less than that of its clients if these break their
-+ * association with it or if the server socket is closed while clients
-+ * are still connected to it and there's no way to inform "a polling
-+ * implementation" that it should let go of a certain wait queue
-+ *
-+ * In order to propagate a wake up, a wait_queue_t of the client
-+ * socket is enqueued on the peer_wait queue of the server socket
-+ * whose wake function does a wake_up on the ordinary client socket
-+ * wait queue. This connection is established whenever a write (or
-+ * poll for write) hit the flow control condition and broken when the
-+ * association to the server socket is dissolved or after a wake up
-+ * was relayed.
-+ */
-+
-+static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
-+ void *key)
-+{
-+ struct unix_sock *u;
-+ wait_queue_head_t *u_sleep;
-+
-+ u = container_of(q, struct unix_sock, peer_wake);
-+
-+ __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
-+ q);
-+ u->peer_wake.private = NULL;
-+
-+ /* relaying can only happen while the wq still exists */
-+ u_sleep = sk_sleep(&u->sk);
-+ if (u_sleep)
-+ wake_up_interruptible_poll(u_sleep, key);
-+
-+ return 0;
-+}
-+
-+static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
-+{
-+ struct unix_sock *u, *u_other;
-+ int rc;
-+
-+ u = unix_sk(sk);
-+ u_other = unix_sk(other);
-+ rc = 0;
-+ spin_lock(&u_other->peer_wait.lock);
-+
-+ if (!u->peer_wake.private) {
-+ u->peer_wake.private = other;
-+ __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
-+
-+ rc = 1;
-+ }
-+
-+ spin_unlock(&u_other->peer_wait.lock);
-+ return rc;
-+}
-+
-+static void unix_dgram_peer_wake_disconnect(struct sock *sk,
-+ struct sock *other)
-+{
-+ struct unix_sock *u, *u_other;
-+
-+ u = unix_sk(sk);
-+ u_other = unix_sk(other);
-+ spin_lock(&u_other->peer_wait.lock);
-+
-+ if (u->peer_wake.private == other) {
-+ __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
-+ u->peer_wake.private = NULL;
-+ }
-+
-+ spin_unlock(&u_other->peer_wait.lock);
-+}
-+
-+static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
-+ struct sock *other)
-+{
-+ unix_dgram_peer_wake_disconnect(sk, other);
-+ wake_up_interruptible_poll(sk_sleep(sk),
-+ POLLOUT |
-+ POLLWRNORM |
-+ POLLWRBAND);
-+}
-+
-+/* preconditions:
-+ * - unix_peer(sk) == other
-+ * - association is stable
-+ */
-+static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
-+{
-+ int connected;
-+
-+ connected = unix_dgram_peer_wake_connect(sk, other);
-+
-+ if (unix_recvq_full(other))
-+ return 1;
-+
-+ if (connected)
-+ unix_dgram_peer_wake_disconnect(sk, other);
-+
-+ return 0;
-+}
-+
- static inline int unix_writable(struct sock *sk)
- {
- return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
-@@ -430,6 +542,8 @@ static void unix_release_sock(struct soc
- skpair->sk_state_change(skpair);
- sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
- }
-+
-+ unix_dgram_peer_wake_disconnect(sk, skpair);
- sock_put(skpair); /* It may now die */
- unix_peer(sk) = NULL;
- }
-@@ -664,6 +778,7 @@ static struct sock *unix_create1(struct
- INIT_LIST_HEAD(&u->link);
- mutex_init(&u->readlock); /* single task reading lock */
- init_waitqueue_head(&u->peer_wait);
-+ init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
- unix_insert_socket(unix_sockets_unbound(sk), sk);
- out:
- if (sk == NULL)
-@@ -1031,6 +1146,8 @@ restart:
- if (unix_peer(sk)) {
- struct sock *old_peer = unix_peer(sk);
- unix_peer(sk) = other;
-+ unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
-+
- unix_state_double_unlock(sk, other);
-
- if (other != old_peer)
-@@ -1470,6 +1587,7 @@ static int unix_dgram_sendmsg(struct soc
- struct scm_cookie scm;
- int max_level;
- int data_len = 0;
-+ int sk_locked;
-
- wait_for_unix_gc();
- err = scm_send(sock, msg, &scm, false);
-@@ -1548,12 +1666,14 @@ restart:
- goto out_free;
- }
-
-+ sk_locked = 0;
- unix_state_lock(other);
-+restart_locked:
- err = -EPERM;
- if (!unix_may_send(sk, other))
- goto out_unlock;
-
-- if (sock_flag(other, SOCK_DEAD)) {
-+ if (unlikely(sock_flag(other, SOCK_DEAD))) {
- /*
- * Check with 1003.1g - what should
- * datagram error
-@@ -1561,10 +1681,14 @@ restart:
- unix_state_unlock(other);
- sock_put(other);
-
-+ if (!sk_locked)
-+ unix_state_lock(sk);
-+
- err = 0;
-- unix_state_lock(sk);
- if (unix_peer(sk) == other) {
- unix_peer(sk) = NULL;
-+ unix_dgram_peer_wake_disconnect_wakeup(sk, other);
-+
- unix_state_unlock(sk);
-
- unix_dgram_disconnected(sk, other);
-@@ -1590,21 +1714,38 @@ restart:
- goto out_unlock;
- }
-
-- if (unix_peer(other) != sk && unix_recvq_full(other)) {
-- if (!timeo) {
-- err = -EAGAIN;
-- goto out_unlock;
-+ if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
-+ if (timeo) {
-+ timeo = unix_wait_for_peer(other, timeo);
-+
-+ err = sock_intr_errno(timeo);
-+ if (signal_pending(current))
-+ goto out_free;
-+
-+ goto restart;
- }
-
-- timeo = unix_wait_for_peer(other, timeo);
-+ if (!sk_locked) {
-+ unix_state_unlock(other);
-+ unix_state_double_lock(sk, other);
-+ }
-
-- err = sock_intr_errno(timeo);
-- if (signal_pending(current))
-- goto out_free;
-+ if (unix_peer(sk) != other ||
-+ unix_dgram_peer_wake_me(sk, other)) {
-+ err = -EAGAIN;
-+ sk_locked = 1;
-+ goto out_unlock;
-+ }
-
-- goto restart;
-+ if (!sk_locked) {
-+ sk_locked = 1;
-+ goto restart_locked;
-+ }
- }
-
-+ if (unlikely(sk_locked))
-+ unix_state_unlock(sk);
-+
- if (sock_flag(other, SOCK_RCVTSTAMP))
- __net_timestamp(skb);
- maybe_add_creds(skb, sock, other);
-@@ -1618,6 +1759,8 @@ restart:
- return len;
-
- out_unlock:
-+ if (sk_locked)
-+ unix_state_unlock(sk);
- unix_state_unlock(other);
- out_free:
- kfree_skb(skb);
-@@ -2453,14 +2596,16 @@ static unsigned int unix_dgram_poll(stru
- return mask;
-
- writable = unix_writable(sk);
-- other = unix_peer_get(sk);
-- if (other) {
-- if (unix_peer(other) != sk) {
-- sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
-- if (unix_recvq_full(other))
-- writable = 0;
-- }
-- sock_put(other);
-+ if (writable) {
-+ unix_state_lock(sk);
-+
-+ other = unix_peer(sk);
-+ if (other && unix_peer(other) != sk &&
-+ unix_recvq_full(other) &&
-+ unix_dgram_peer_wake_me(sk, other))
-+ writable = 0;
-+
-+ unix_state_unlock(sk);
- }
-
- if (writable)
diff --git a/debian/patches/series b/debian/patches/series
index 631542793747..c45841638f86 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -85,16 +85,13 @@ bugfix/all/selftests-kprobe-choose-an-always-defined-function-t.patch
bugfix/all/selftests-make-scripts-executable.patch
bugfix/all/selftests-vm-try-harder-to-allocate-huge-pages.patch
bugfix/all/selftests-breakpoints-actually-build-it.patch
-bugfix/all/rds-fix-race-condition-when-sending-a-message-on-unbound-socket.patch
bugfix/all/media-media-vivid-osd-fix-info-leak-in-ioctl.patch
bugfix/x86/kvm-svm-unconditionally-intercept-DB.patch
bugfix/x86/kvm-x86-rename-update_db_bp_intercept-to-update_bp_i.patch
bugfix/all/usbvision-fix-overflow-of-interfaces-array.patch
bugfix/all/media-usbvision-fix-crash-on-detecting-device-with-i.patch
-bugfix/all/unix-avoid-use-after-free-in-ep_remove_wait_queue.patch
bugfix/all/isdn_ppp-add-checks-for-allocation-failure-in-isdn_p.patch
bugfix/all/ppp-slip-validate-vj-compression-slot-parameters-com.patch
-bugfix/all/btrfs-fix-truncation-of-compressed-and-inlined-exten.patch
bugfix/x86/drm-i915-shut-up-gen8-sde-irq-dmesg-noise.patch
bugfix/arm/arm-dts-kirkwood-fix-qnap-ts219-power-off.patch
bugfix/x86/drm-i915-mark-uneven-memory-banks-on-gen4-desktop-as.patch