aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSalvatore Bonaccorso <carnil@debian.org>2021-03-19 16:51:23 +0100
committerSalvatore Bonaccorso <carnil@debian.org>2021-03-19 16:51:53 +0100
commit65e1218127075c33c626e454807a0c64018cece9 (patch)
tree667d7f11bb536e8ba45816d03099eff6bd1bc33a
parentb04c1cdb3e7a7c4744c644394db8df3f702bdb65 (diff)
downloadkernel_replicant_linux-65e1218127075c33c626e454807a0c64018cece9.tar.gz
kernel_replicant_linux-65e1218127075c33c626e454807a0c64018cece9.tar.bz2
kernel_replicant_linux-65e1218127075c33c626e454807a0c64018cece9.zip
RDMA/srp: Fix support for unpopulated and unbalanced NUMA nodes
-rw-r--r--debian/changelog1
-rw-r--r--debian/patches/bugfix/all/rdma-srp-fix-support-for-unpopulated-and-unbalanced-numa-nodes.patch179
-rw-r--r--debian/patches/series1
3 files changed, 181 insertions, 0 deletions
diff --git a/debian/changelog b/debian/changelog
index 7f29ad286e47..532379434c8e 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -887,6 +887,7 @@ linux (5.10.24-1) UNRELEASED; urgency=medium
* bpf, selftests: Fix up some test_verifier cases for unprivileged
* [x86] crypto: aesni - Use TEST %reg,%reg instead of CMP $0,%reg
* [x86] crypto: x86/aes-ni-xts - use direct calls to and 4-way stride
+ * RDMA/srp: Fix support for unpopulated and unbalanced NUMA nodes
[ Wookey ]
* [arm64] drivers/perf: Enable ARM_CMN as module (Closes: #981186)
diff --git a/debian/patches/bugfix/all/rdma-srp-fix-support-for-unpopulated-and-unbalanced-numa-nodes.patch b/debian/patches/bugfix/all/rdma-srp-fix-support-for-unpopulated-and-unbalanced-numa-nodes.patch
new file mode 100644
index 000000000000..30e93fa74ce7
--- /dev/null
+++ b/debian/patches/bugfix/all/rdma-srp-fix-support-for-unpopulated-and-unbalanced-numa-nodes.patch
@@ -0,0 +1,179 @@
+From 2b5715fc17386a6223490d5b8f08d031999b0c0b Mon Sep 17 00:00:00 2001
+From: Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
+Date: Fri, 5 Feb 2021 09:14:28 +0100
+Subject: RDMA/srp: Fix support for unpopulated and unbalanced NUMA nodes
+
+From: Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
+
+commit 2b5715fc17386a6223490d5b8f08d031999b0c0b upstream.
+
+The current code computes a number of channels per SRP target and spreads
+them equally across all online NUMA nodes. Each channel is then assigned
+a CPU within this node.
+
+In the case of unbalanced, or even unpopulated nodes, some channels do not
+get a CPU associated and thus do not get connected. This causes the SRP
+connection to fail.
+
+This patch solves the issue by rewriting channel computation and
+allocation:
+
+- Drop channel to node/CPU association as it had no real effect on
+ locality but added unnecessary complexity.
+
+- Tweak the number of channels allocated to reduce CPU contention when
+ possible:
+ - Up to one channel per CPU (instead of up to 4 by node)
+ - At least 4 channels per node, unless ch_count module parameter is
+ used.
+
+Link: https://lore.kernel.org/r/9cb4d9d3-30ad-2276-7eff-e85f7ddfb411@suse.com
+Signed-off-by: Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
+Reviewed-by: Bart Van Assche <bvanassche@acm.org>
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Cc: Yi Zhang <yi.zhang@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/ulp/srp/ib_srp.c | 116 ++++++++++++++----------------------
+ 1 file changed, 48 insertions(+), 68 deletions(-)
+
+--- a/drivers/infiniband/ulp/srp/ib_srp.c
++++ b/drivers/infiniband/ulp/srp/ib_srp.c
+@@ -3624,7 +3624,7 @@ static ssize_t srp_create_target(struct
+ struct srp_rdma_ch *ch;
+ struct srp_device *srp_dev = host->srp_dev;
+ struct ib_device *ibdev = srp_dev->dev;
+- int ret, node_idx, node, cpu, i;
++ int ret, i, ch_idx;
+ unsigned int max_sectors_per_mr, mr_per_cmd = 0;
+ bool multich = false;
+ uint32_t max_iu_len;
+@@ -3749,81 +3749,61 @@ static ssize_t srp_create_target(struct
+ goto out;
+
+ ret = -ENOMEM;
+- if (target->ch_count == 0)
++ if (target->ch_count == 0) {
+ target->ch_count =
+- max_t(unsigned int, num_online_nodes(),
+- min(ch_count ?:
+- min(4 * num_online_nodes(),
+- ibdev->num_comp_vectors),
+- num_online_cpus()));
++ min(ch_count ?:
++ max(4 * num_online_nodes(),
++ ibdev->num_comp_vectors),
++ num_online_cpus());
++ }
++
+ target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
+ GFP_KERNEL);
+ if (!target->ch)
+ goto out;
+
+- node_idx = 0;
+- for_each_online_node(node) {
+- const int ch_start = (node_idx * target->ch_count /
+- num_online_nodes());
+- const int ch_end = ((node_idx + 1) * target->ch_count /
+- num_online_nodes());
+- const int cv_start = node_idx * ibdev->num_comp_vectors /
+- num_online_nodes();
+- const int cv_end = (node_idx + 1) * ibdev->num_comp_vectors /
+- num_online_nodes();
+- int cpu_idx = 0;
+-
+- for_each_online_cpu(cpu) {
+- if (cpu_to_node(cpu) != node)
+- continue;
+- if (ch_start + cpu_idx >= ch_end)
+- continue;
+- ch = &target->ch[ch_start + cpu_idx];
+- ch->target = target;
+- ch->comp_vector = cv_start == cv_end ? cv_start :
+- cv_start + cpu_idx % (cv_end - cv_start);
+- spin_lock_init(&ch->lock);
+- INIT_LIST_HEAD(&ch->free_tx);
+- ret = srp_new_cm_id(ch);
+- if (ret)
+- goto err_disconnect;
+-
+- ret = srp_create_ch_ib(ch);
+- if (ret)
+- goto err_disconnect;
+-
+- ret = srp_alloc_req_data(ch);
+- if (ret)
+- goto err_disconnect;
+-
+- ret = srp_connect_ch(ch, max_iu_len, multich);
+- if (ret) {
+- char dst[64];
+-
+- if (target->using_rdma_cm)
+- snprintf(dst, sizeof(dst), "%pIS",
+- &target->rdma_cm.dst);
+- else
+- snprintf(dst, sizeof(dst), "%pI6",
+- target->ib_cm.orig_dgid.raw);
+- shost_printk(KERN_ERR, target->scsi_host,
+- PFX "Connection %d/%d to %s failed\n",
+- ch_start + cpu_idx,
+- target->ch_count, dst);
+- if (node_idx == 0 && cpu_idx == 0) {
+- goto free_ch;
+- } else {
+- srp_free_ch_ib(target, ch);
+- srp_free_req_data(target, ch);
+- target->ch_count = ch - target->ch;
+- goto connected;
+- }
++ for (ch_idx = 0; ch_idx < target->ch_count; ++ch_idx) {
++ ch = &target->ch[ch_idx];
++ ch->target = target;
++ ch->comp_vector = ch_idx % ibdev->num_comp_vectors;
++ spin_lock_init(&ch->lock);
++ INIT_LIST_HEAD(&ch->free_tx);
++ ret = srp_new_cm_id(ch);
++ if (ret)
++ goto err_disconnect;
++
++ ret = srp_create_ch_ib(ch);
++ if (ret)
++ goto err_disconnect;
++
++ ret = srp_alloc_req_data(ch);
++ if (ret)
++ goto err_disconnect;
++
++ ret = srp_connect_ch(ch, max_iu_len, multich);
++ if (ret) {
++ char dst[64];
++
++ if (target->using_rdma_cm)
++ snprintf(dst, sizeof(dst), "%pIS",
++ &target->rdma_cm.dst);
++ else
++ snprintf(dst, sizeof(dst), "%pI6",
++ target->ib_cm.orig_dgid.raw);
++ shost_printk(KERN_ERR, target->scsi_host,
++ PFX "Connection %d/%d to %s failed\n",
++ ch_idx,
++ target->ch_count, dst);
++ if (ch_idx == 0) {
++ goto free_ch;
++ } else {
++ srp_free_ch_ib(target, ch);
++ srp_free_req_data(target, ch);
++ target->ch_count = ch - target->ch;
++ goto connected;
+ }
+-
+- multich = true;
+- cpu_idx++;
+ }
+- node_idx++;
++ multich = true;
+ }
+
+ connected:
diff --git a/debian/patches/series b/debian/patches/series
index 8a72da81f97f..1cbb9581b331 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -89,6 +89,7 @@ bugfix/all/fs-add-module_softdep-declarations-for-hard-coded-cr.patch
bugfix/all/partially-revert-usb-kconfig-using-select-for-usb_co.patch
debian/makefile-do-not-check-for-libelf-when-building-oot-module.patch
bugfix/all/partially-revert-net-socket-implement-64-bit-timestamps.patch
+bugfix/all/rdma-srp-fix-support-for-unpopulated-and-unbalanced-numa-nodes.patch
# Miscellaneous features