]> git.openfabrics.org - ~emulex/compat-rdma_3.12.git/commitdiff
Added mlx5 cherry-picks based on upstream commits
authorVladimir Sokolovsky <vlad@mellanox.com>
Mon, 3 Feb 2014 15:18:36 +0000 (17:18 +0200)
committerVladimir Sokolovsky <vlad@mellanox.com>
Mon, 3 Feb 2014 15:18:36 +0000 (17:18 +0200)
57761d8df8efc7cc1227f9bc22e0dda01b0dd91b IB/mlx5: Verify reserved fields are cleared
8c8a49148b95c4d7c5f58a6866a30ea02485d7a3 IB/mlx5: Remove old field for create mkey mailbox
1bde6e301cf6217da9238086c958f532b16e504d IB/mlx5: Abort driver cleanup if teardown hca fails
9e9c47d07d447e09a66ee528c3ebad9ba359af6a IB/mlx5: Allow creation of QPs with zero-length work queues
05bdb2ab6b09f2306f0afe0f60f4b9abffa7aba4 mlx5_core: Fix PowerPC support
db81a5c374b5bd650c5e6ae85d026709751db103 mlx5_core: Improve debugfs readability
bde51583f49bd87e452e9504d489926638046b11 IB/mlx5: Add support for resize CQ
3bdb31f688276505ede23280885948e934304674 IB/mlx5: Implement modify CQ
ada388f7afad1e2e87acbfe30600fdaff9bd6327 IB/mlx5: Make sure doorbell record is visible before doorbell
042b9adae899e1b497282d92205d3fef42d5ca8d mlx5_core: Use mlx5 core style warning
0b6e81b91070bdbe0defb9101384ebb26835e401 IB/mlx5: Clear out struct before create QP command
e08a8761d89b7625144c3fbf0ff9643159135c96 mlx5_core: Fix out arg size in access_register command
c1be5232d21de68f46637e617225b9b7c586451a IB/mlx5: Fix micro UAR allocator
24e42754f676d34e5c26d6b7b30f36df8004ec08 mlx5_core: Remove dead code
d9fe40916387bab884e458c99399c149b033506c IB/mlx5: Remove unused code in mr.c
cf1c5e1f1c965cf44e680127b2e9564fc472676c IB/mlx5: Fix page shift in create CQ for userspace
2b136d025348774633a2f6fc2a87f0cf409a3ab9 IB/mlx5: Fix list_del of empty list
7e2e19210a8bbbcacd31e8ce4a0ea64e3ac37dea IB/mlx5: Remove dead code
1b77d2bd753d119eedcbc08fda58934307676554 mlx5: Use enum to indicate adapter page size
c2a3431e6153ed90911704356bc1e869624e118d IB/mlx5: Update opt param mask for RTS2RTS
07c9113fe8c67e28707b0a4b1e8580abe0327145 IB/mlx5: Remove "Always false" comparison
2d036fad949080f178e02b12c93a61367e9f562f IB/mlx5: Remove dead code in mr.c
4e3d677ba986d5c8e76ee1742c1d4d79bc197d5c mlx5_core: Change optimal_reclaimed_pages for better performance
87b8de492da34942fc554f2958a570ce0642296a mlx5: Clear reserved area in set_hca_cap()
bf0bf77f6519e5dcd57a77b47e1d151c1e81b7ec mlx5: Support communicating arbitrary host page size to firmware
952f5f6e807ba82e1b82fcfcf7f73db022342aa7 mlx5: Fix cleanup flow when DMA mapping fails
cfd8f1d49b61b20aab77d5af5ec907dc99bb0064 IB/mlx5: Fix srq free in destroy qp
1faacf82dfb3e0027087ff7e6aae5e0643b98a4d IB/mlx5: Simplify mlx5_ib_destroy_srq
9641b74ebee65320fa52172995d6df12b641caa5 IB/mlx5: Fix overflow check in IB_WR_FAST_REG_MR
746b5583c1a48a837f4891adaff5e09d61b204a6 IB/mlx5: Multithreaded create MR
51ee86a4af639e4ee8953dd02ad8a766c40f46a1 IB/mlx5: Fix check of number of entries in create CQ

Signed-off-by: Vladimir Sokolovsky <vlad@mellanox.com>
31 files changed:
linux-next-cherry-picks/0012-IB-mlx5-Fix-check-of-number-of-entries-in-create-CQ.patch [new file with mode: 0644]
linux-next-cherry-picks/0013-IB-mlx5-Multithreaded-create-MR.patch [new file with mode: 0644]
linux-next-cherry-picks/0014-IB-mlx5-Fix-overflow-check-in-IB_WR_FAST_REG_MR.patch [new file with mode: 0644]
linux-next-cherry-picks/0015-IB-mlx5-Simplify-mlx5_ib_destroy_srq.patch [new file with mode: 0644]
linux-next-cherry-picks/0016-IB-mlx5-Fix-srq-free-in-destroy-qp.patch [new file with mode: 0644]
linux-next-cherry-picks/0017-mlx5-Fix-cleanup-flow-when-DMA-mapping-fails.patch [new file with mode: 0644]
linux-next-cherry-picks/0018-mlx5-Support-communicating-arbitrary-host-page-size-.patch [new file with mode: 0644]
linux-next-cherry-picks/0019-mlx5-Clear-reserved-area-in-set_hca_cap.patch [new file with mode: 0644]
linux-next-cherry-picks/0020-mlx5_core-Change-optimal_reclaimed_pages-for-better-.patch [new file with mode: 0644]
linux-next-cherry-picks/0021-IB-mlx5-Remove-dead-code-in-mr.c.patch [new file with mode: 0644]
linux-next-cherry-picks/0022-IB-mlx5-Remove-Always-false-comparison.patch [new file with mode: 0644]
linux-next-cherry-picks/0023-IB-mlx5-Update-opt-param-mask-for-RTS2RTS.patch [new file with mode: 0644]
linux-next-cherry-picks/0024-mlx5-Use-enum-to-indicate-adapter-page-size.patch [new file with mode: 0644]
linux-next-cherry-picks/0025-IB-mlx5-Remove-dead-code.patch [new file with mode: 0644]
linux-next-cherry-picks/0026-IB-mlx5-Fix-list_del-of-empty-list.patch [new file with mode: 0644]
linux-next-cherry-picks/0027-IB-mlx5-Fix-page-shift-in-create-CQ-for-userspace.patch [new file with mode: 0644]
linux-next-cherry-picks/0028-IB-mlx5-Remove-unused-code-in-mr.c.patch [new file with mode: 0644]
linux-next-cherry-picks/0029-mlx5_core-Remove-dead-code.patch [new file with mode: 0644]
linux-next-cherry-picks/0030-IB-mlx5-Fix-micro-UAR-allocator.patch [new file with mode: 0644]
linux-next-cherry-picks/0031-mlx5_core-Fix-out-arg-size-in-access_register-comman.patch [new file with mode: 0644]
linux-next-cherry-picks/0032-IB-mlx5-Clear-out-struct-before-create-QP-command.patch [new file with mode: 0644]
linux-next-cherry-picks/0033-mlx5_core-Use-mlx5-core-style-warning.patch [new file with mode: 0644]
linux-next-cherry-picks/0034-IB-mlx5-Make-sure-doorbell-record-is-visible-before-.patch [new file with mode: 0644]
linux-next-cherry-picks/0035-IB-mlx5-Implement-modify-CQ.patch [new file with mode: 0644]
linux-next-cherry-picks/0036-IB-mlx5-Add-support-for-resize-CQ.patch [new file with mode: 0644]
linux-next-cherry-picks/0037-mlx5_core-Improve-debugfs-readability.patch [new file with mode: 0644]
linux-next-cherry-picks/0038-mlx5_core-Fix-PowerPC-support.patch [new file with mode: 0644]
linux-next-cherry-picks/0039-IB-mlx5-Allow-creation-of-QPs-with-zero-length-work-.patch [new file with mode: 0644]
linux-next-cherry-picks/0040-IB-mlx5-Abort-driver-cleanup-if-teardown-hca-fails.patch [new file with mode: 0644]
linux-next-cherry-picks/0041-IB-mlx5-Remove-old-field-for-create-mkey-mailbox.patch [new file with mode: 0644]
linux-next-cherry-picks/0042-IB-mlx5-Verify-reserved-fields-are-cleared.patch [new file with mode: 0644]

diff --git a/linux-next-cherry-picks/0012-IB-mlx5-Fix-check-of-number-of-entries-in-create-CQ.patch b/linux-next-cherry-picks/0012-IB-mlx5-Fix-check-of-number-of-entries-in-create-CQ.patch
new file mode 100644 (file)
index 0000000..a3dc6fa
--- /dev/null
@@ -0,0 +1,28 @@
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Fix check of number of entries in create CQ
+
+Verify that the value is non negative before rounding up to power of 2.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/cq.c |    5 ++++-
+ 1 files changed, 4 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/cq.c
++++ b/drivers/infiniband/hw/mlx5/cq.c
+@@ -653,8 +653,11 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
+       int eqn;
+       int err;
++      if (entries < 0)
++              return ERR_PTR(-EINVAL);
++
+       entries = roundup_pow_of_two(entries + 1);
+-      if (entries < 1 || entries > dev->mdev.caps.max_cqes)
++      if (entries > dev->mdev.caps.max_cqes)
+               return ERR_PTR(-EINVAL);
+       cq = kzalloc(sizeof(*cq), GFP_KERNEL);
diff --git a/linux-next-cherry-picks/0013-IB-mlx5-Multithreaded-create-MR.patch b/linux-next-cherry-picks/0013-IB-mlx5-Multithreaded-create-MR.patch
new file mode 100644 (file)
index 0000000..9f58cb6
--- /dev/null
@@ -0,0 +1,831 @@
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Multithreaded create MR
+
+Use asynchronous commands to execute up to eight concurrent create MR
+commands. This is to fill memory caches faster so we keep consuming
+from there.  Also, increase timeout for shrinking caches to five
+minutes.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/main.c                 |    3 +-
+ drivers/infiniband/hw/mlx5/mlx5_ib.h              |    6 +
+ drivers/infiniband/hw/mlx5/mr.c                   |  163 ++++++++++++++++-----
+ drivers/infiniband/hw/mlx5/qp.c                   |    4 +-
+ drivers/net/ethernet/mellanox/mlx5/core/cmd.c     |  106 ++++++++++----
+ drivers/net/ethernet/mellanox/mlx5/core/debugfs.c |    8 +-
+ drivers/net/ethernet/mellanox/mlx5/core/mr.c      |   32 +++--
+ include/linux/mlx5/driver.h                       |   17 ++-
+ 8 files changed, 255 insertions(+), 84 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/main.c
++++ b/drivers/infiniband/hw/mlx5/main.c
+@@ -745,7 +745,8 @@ static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn)
+       seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+       seg->start_addr = 0;
+-      err = mlx5_core_create_mkey(&dev->mdev, &mr, in, sizeof(*in));
++      err = mlx5_core_create_mkey(&dev->mdev, &mr, in, sizeof(*in),
++                                  NULL, NULL, NULL);
+       if (err) {
+               mlx5_ib_warn(dev, "failed to create mkey, %d\n", err);
+               goto err_in;
+diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
++++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
+@@ -262,6 +262,9 @@ struct mlx5_ib_mr {
+       int                     npages;
+       struct completion       done;
+       enum ib_wc_status       status;
++      struct mlx5_ib_dev     *dev;
++      struct mlx5_create_mkey_mbox_out out;
++      unsigned long           start;
+ };
+ struct mlx5_ib_fast_reg_page_list {
+@@ -323,6 +326,7 @@ struct mlx5_cache_ent {
+       struct mlx5_ib_dev     *dev;
+       struct work_struct      work;
+       struct delayed_work     dwork;
++      int                     pending;
+ };
+ struct mlx5_mr_cache {
+@@ -358,6 +362,8 @@ struct mlx5_ib_dev {
+       spinlock_t                      mr_lock;
+       struct mlx5_ib_resources        devr;
+       struct mlx5_mr_cache            cache;
++      struct timer_list               delay_timer;
++      int                             fill_delay;
+ };
+ static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
+diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/mr.c
++++ b/drivers/infiniband/hw/mlx5/mr.c
+@@ -35,11 +35,13 @@
+ #include <linux/random.h>
+ #include <linux/debugfs.h>
+ #include <linux/export.h>
++#include <linux/delay.h>
+ #include <rdma/ib_umem.h>
+ #include "mlx5_ib.h"
+ enum {
+       DEF_CACHE_SIZE  = 10,
++      MAX_PENDING_REG_MR = 8,
+ };
+ enum {
+@@ -63,6 +65,57 @@ static int order2idx(struct mlx5_ib_dev *dev, int order)
+               return order - cache->ent[0].order;
+ }
++static void reg_mr_callback(int status, void *context)
++{
++      struct mlx5_ib_mr *mr = context;
++      struct mlx5_ib_dev *dev = mr->dev;
++      struct mlx5_mr_cache *cache = &dev->cache;
++      int c = order2idx(dev, mr->order);
++      struct mlx5_cache_ent *ent = &cache->ent[c];
++      u8 key;
++      unsigned long delta = jiffies - mr->start;
++      unsigned long index;
++      unsigned long flags;
++
++      index = find_last_bit(&delta, 8 * sizeof(delta));
++      if (index == 64)
++              index = 0;
++
++      spin_lock_irqsave(&ent->lock, flags);
++      ent->pending--;
++      spin_unlock_irqrestore(&ent->lock, flags);
++      if (status) {
++              mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
++              kfree(mr);
++              dev->fill_delay = 1;
++              mod_timer(&dev->delay_timer, jiffies + HZ);
++              return;
++      }
++
++      if (mr->out.hdr.status) {
++              mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n",
++                           mr->out.hdr.status,
++                           be32_to_cpu(mr->out.hdr.syndrome));
++              kfree(mr);
++              dev->fill_delay = 1;
++              mod_timer(&dev->delay_timer, jiffies + HZ);
++              return;
++      }
++
++      spin_lock_irqsave(&dev->mdev.priv.mkey_lock, flags);
++      key = dev->mdev.priv.mkey_key++;
++      spin_unlock_irqrestore(&dev->mdev.priv.mkey_lock, flags);
++      mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key;
++
++      cache->last_add = jiffies;
++
++      spin_lock_irqsave(&ent->lock, flags);
++      list_add_tail(&mr->list, &ent->head);
++      ent->cur++;
++      ent->size++;
++      spin_unlock_irqrestore(&ent->lock, flags);
++}
++
+ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
+ {
+       struct mlx5_mr_cache *cache = &dev->cache;
+@@ -78,36 +131,39 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
+               return -ENOMEM;
+       for (i = 0; i < num; i++) {
++              if (ent->pending >= MAX_PENDING_REG_MR) {
++                      err = -EAGAIN;
++                      break;
++              }
++
+               mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+               if (!mr) {
+                       err = -ENOMEM;
+-                      goto out;
++                      break;
+               }
+               mr->order = ent->order;
+               mr->umred = 1;
++              mr->dev = dev;
+               in->seg.status = 1 << 6;
+               in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
+               in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+               in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
+               in->seg.log2_page_size = 12;
++              spin_lock_irq(&ent->lock);
++              ent->pending++;
++              spin_unlock_irq(&ent->lock);
++              mr->start = jiffies;
+               err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in,
+-                                          sizeof(*in));
++                                          sizeof(*in), reg_mr_callback,
++                                          mr, &mr->out);
+               if (err) {
+                       mlx5_ib_warn(dev, "create mkey failed %d\n", err);
+                       kfree(mr);
+-                      goto out;
++                      break;
+               }
+-              cache->last_add = jiffies;
+-
+-              spin_lock(&ent->lock);
+-              list_add_tail(&mr->list, &ent->head);
+-              ent->cur++;
+-              ent->size++;
+-              spin_unlock(&ent->lock);
+       }
+-out:
+       kfree(in);
+       return err;
+ }
+@@ -121,16 +177,16 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
+       int i;
+       for (i = 0; i < num; i++) {
+-              spin_lock(&ent->lock);
++              spin_lock_irq(&ent->lock);
+               if (list_empty(&ent->head)) {
+-                      spin_unlock(&ent->lock);
++                      spin_unlock_irq(&ent->lock);
+                       return;
+               }
+               mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
+               list_del(&mr->list);
+               ent->cur--;
+               ent->size--;
+-              spin_unlock(&ent->lock);
++              spin_unlock_irq(&ent->lock);
+               err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
+               if (err)
+                       mlx5_ib_warn(dev, "failed destroy mkey\n");
+@@ -162,9 +218,13 @@ static ssize_t size_write(struct file *filp, const char __user *buf,
+               return -EINVAL;
+       if (var > ent->size) {
+-              err = add_keys(dev, c, var - ent->size);
+-              if (err)
+-                      return err;
++              do {
++                      err = add_keys(dev, c, var - ent->size);
++                      if (err && err != -EAGAIN)
++                              return err;
++
++                      usleep_range(3000, 5000);
++              } while (err);
+       } else if (var < ent->size) {
+               remove_keys(dev, c, ent->size - var);
+       }
+@@ -280,23 +340,37 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
+       struct mlx5_ib_dev *dev = ent->dev;
+       struct mlx5_mr_cache *cache = &dev->cache;
+       int i = order2idx(dev, ent->order);
++      int err;
+       if (cache->stopped)
+               return;
+       ent = &dev->cache.ent[i];
+-      if (ent->cur < 2 * ent->limit) {
+-              add_keys(dev, i, 1);
+-              if (ent->cur < 2 * ent->limit)
+-                      queue_work(cache->wq, &ent->work);
++      if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
++              err = add_keys(dev, i, 1);
++              if (ent->cur < 2 * ent->limit) {
++                      if (err == -EAGAIN) {
++                              mlx5_ib_dbg(dev, "returned eagain, order %d\n",
++                                          i + 2);
++                              queue_delayed_work(cache->wq, &ent->dwork,
++                                                 msecs_to_jiffies(3));
++                      } else if (err) {
++                              mlx5_ib_warn(dev, "command failed order %d, err %d\n",
++                                           i + 2, err);
++                              queue_delayed_work(cache->wq, &ent->dwork,
++                                                 msecs_to_jiffies(1000));
++                      } else {
++                              queue_work(cache->wq, &ent->work);
++                      }
++              }
+       } else if (ent->cur > 2 * ent->limit) {
+               if (!someone_adding(cache) &&
+-                  time_after(jiffies, cache->last_add + 60 * HZ)) {
++                  time_after(jiffies, cache->last_add + 300 * HZ)) {
+                       remove_keys(dev, i, 1);
+                       if (ent->cur > ent->limit)
+                               queue_work(cache->wq, &ent->work);
+               } else {
+-                      queue_delayed_work(cache->wq, &ent->dwork, 60 * HZ);
++                      queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
+               }
+       }
+ }
+@@ -336,18 +410,18 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
+               mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
+-              spin_lock(&ent->lock);
++              spin_lock_irq(&ent->lock);
+               if (!list_empty(&ent->head)) {
+                       mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
+                                             list);
+                       list_del(&mr->list);
+                       ent->cur--;
+-                      spin_unlock(&ent->lock);
++                      spin_unlock_irq(&ent->lock);
+                       if (ent->cur < ent->limit)
+                               queue_work(cache->wq, &ent->work);
+                       break;
+               }
+-              spin_unlock(&ent->lock);
++              spin_unlock_irq(&ent->lock);
+               queue_work(cache->wq, &ent->work);
+@@ -374,12 +448,12 @@ static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
+               return;
+       }
+       ent = &cache->ent[c];
+-      spin_lock(&ent->lock);
++      spin_lock_irq(&ent->lock);
+       list_add_tail(&mr->list, &ent->head);
+       ent->cur++;
+       if (ent->cur > 2 * ent->limit)
+               shrink = 1;
+-      spin_unlock(&ent->lock);
++      spin_unlock_irq(&ent->lock);
+       if (shrink)
+               queue_work(cache->wq, &ent->work);
+@@ -394,16 +468,16 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
+       cancel_delayed_work(&ent->dwork);
+       while (1) {
+-              spin_lock(&ent->lock);
++              spin_lock_irq(&ent->lock);
+               if (list_empty(&ent->head)) {
+-                      spin_unlock(&ent->lock);
++                      spin_unlock_irq(&ent->lock);
+                       return;
+               }
+               mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
+               list_del(&mr->list);
+               ent->cur--;
+               ent->size--;
+-              spin_unlock(&ent->lock);
++              spin_unlock_irq(&ent->lock);
+               err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
+               if (err)
+                       mlx5_ib_warn(dev, "failed destroy mkey\n");
+@@ -464,6 +538,13 @@ static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
+       debugfs_remove_recursive(dev->cache.root);
+ }
++static void delay_time_func(unsigned long ctx)
++{
++      struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx;
++
++      dev->fill_delay = 0;
++}
++
+ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
+ {
+       struct mlx5_mr_cache *cache = &dev->cache;
+@@ -479,6 +560,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
+               return -ENOMEM;
+       }
++      setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
+       for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
+               INIT_LIST_HEAD(&cache->ent[i].head);
+               spin_lock_init(&cache->ent[i].lock);
+@@ -522,6 +604,7 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
+               clean_keys(dev, i);
+       destroy_workqueue(dev->cache.wq);
++      del_timer_sync(&dev->delay_timer);
+       return 0;
+ }
+@@ -551,7 +634,8 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
+       seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+       seg->start_addr = 0;
+-      err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in));
++      err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL,
++                                  NULL);
+       if (err)
+               goto err_in;
+@@ -660,14 +744,14 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
+       int err;
+       int i;
+-      for (i = 0; i < 10; i++) {
++      for (i = 0; i < 1; i++) {
+               mr = alloc_cached_mr(dev, order);
+               if (mr)
+                       break;
+               err = add_keys(dev, order2idx(dev, order), 1);
+-              if (err) {
+-                      mlx5_ib_warn(dev, "add_keys failed\n");
++              if (err && err != -EAGAIN) {
++                      mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
+                       break;
+               }
+       }
+@@ -759,8 +843,10 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
+       in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
+       in->seg.log2_page_size = page_shift;
+       in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+-      in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
+-      err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen);
++      in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length,
++                                                       1 << page_shift));
++      err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen, NULL,
++                                  NULL, NULL);
+       if (err) {
+               mlx5_ib_warn(dev, "create mkey failed\n");
+               goto err_2;
+@@ -944,7 +1030,8 @@ struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
+        * TBD not needed - issue 197292 */
+       in->seg.log2_page_size = PAGE_SHIFT;
+-      err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in));
++      err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in), NULL,
++                                  NULL, NULL);
+       kfree(in);
+       if (err)
+               goto err_free;
+diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/qp.c
++++ b/drivers/infiniband/hw/mlx5/qp.c
+@@ -1744,6 +1744,7 @@ static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
+                       MLX5_MKEY_MASK_PD               |
+                       MLX5_MKEY_MASK_LR               |
+                       MLX5_MKEY_MASK_LW               |
++                      MLX5_MKEY_MASK_KEY              |
+                       MLX5_MKEY_MASK_RR               |
+                       MLX5_MKEY_MASK_RW               |
+                       MLX5_MKEY_MASK_A                |
+@@ -1800,7 +1801,8 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *w
+       seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
+       seg->len = cpu_to_be64(wr->wr.fast_reg.length);
+       seg->log2_page_size = wr->wr.fast_reg.page_shift;
+-      seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
++      seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 |
++                                     mlx5_mkey_variant(wr->wr.fast_reg.rkey));
+ }
+ static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg,
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+@@ -98,6 +98,7 @@ enum {
+ static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
+                                          struct mlx5_cmd_msg *in,
+                                          struct mlx5_cmd_msg *out,
++                                         void *uout, int uout_size,
+                                          mlx5_cmd_cbk_t cbk,
+                                          void *context, int page_queue)
+ {
+@@ -110,6 +111,8 @@ static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
+       ent->in         = in;
+       ent->out        = out;
++      ent->uout       = uout;
++      ent->uout_size  = uout_size;
+       ent->callback   = cbk;
+       ent->context    = context;
+       ent->cmd        = cmd;
+@@ -534,6 +537,7 @@ static void cmd_work_handler(struct work_struct *work)
+       ent->lay = lay;
+       memset(lay, 0, sizeof(*lay));
+       memcpy(lay->in, ent->in->first.data, sizeof(lay->in));
++      ent->op = be32_to_cpu(lay->in[0]) >> 16;
+       if (ent->in->next)
+               lay->in_ptr = cpu_to_be64(ent->in->next->dma);
+       lay->inlen = cpu_to_be32(ent->in->len);
+@@ -628,7 +632,8 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
+  *    2. page queue commands do not support asynchrous completion
+  */
+ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
+-                         struct mlx5_cmd_msg *out, mlx5_cmd_cbk_t callback,
++                         struct mlx5_cmd_msg *out, void *uout, int uout_size,
++                         mlx5_cmd_cbk_t callback,
+                          void *context, int page_queue, u8 *status)
+ {
+       struct mlx5_cmd *cmd = &dev->cmd;
+@@ -642,7 +647,8 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
+       if (callback && page_queue)
+               return -EINVAL;
+-      ent = alloc_cmd(cmd, in, out, callback, context, page_queue);
++      ent = alloc_cmd(cmd, in, out, uout, uout_size, callback, context,
++                      page_queue);
+       if (IS_ERR(ent))
+               return PTR_ERR(ent);
+@@ -670,10 +676,10 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
+               op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode);
+               if (op < ARRAY_SIZE(cmd->stats)) {
+                       stats = &cmd->stats[op];
+-                      spin_lock(&stats->lock);
++                      spin_lock_irq(&stats->lock);
+                       stats->sum += ds;
+                       ++stats->n;
+-                      spin_unlock(&stats->lock);
++                      spin_unlock_irq(&stats->lock);
+               }
+               mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME,
+                                  "fw exec time for %s is %lld nsec\n",
+@@ -826,7 +832,7 @@ static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev,
+       int n;
+       int i;
+-      msg = kzalloc(sizeof(*msg), GFP_KERNEL);
++      msg = kzalloc(sizeof(*msg), flags);
+       if (!msg)
+               return ERR_PTR(-ENOMEM);
+@@ -1109,6 +1115,19 @@ void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
+               up(&cmd->sem);
+ }
++static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
++{
++      unsigned long flags;
++
++      if (msg->cache) {
++              spin_lock_irqsave(&msg->cache->lock, flags);
++              list_add_tail(&msg->list, &msg->cache->head);
++              spin_unlock_irqrestore(&msg->cache->lock, flags);
++      } else {
++              mlx5_free_cmd_msg(dev, msg);
++      }
++}
++
+ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector)
+ {
+       struct mlx5_cmd *cmd = &dev->cmd;
+@@ -1117,6 +1136,10 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector)
+       void *context;
+       int err;
+       int i;
++      ktime_t t1, t2, delta;
++      s64 ds;
++      struct mlx5_cmd_stats *stats;
++      unsigned long flags;
+       for (i = 0; i < (1 << cmd->log_sz); i++) {
+               if (test_bit(i, &vector)) {
+@@ -1141,9 +1164,29 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector)
+                       }
+                       free_ent(cmd, ent->idx);
+                       if (ent->callback) {
++                              t1 = timespec_to_ktime(ent->ts1);
++                              t2 = timespec_to_ktime(ent->ts2);
++                              delta = ktime_sub(t2, t1);
++                              ds = ktime_to_ns(delta);
++                              if (ent->op < ARRAY_SIZE(cmd->stats)) {
++                                      stats = &cmd->stats[ent->op];
++                                      spin_lock_irqsave(&stats->lock, flags);
++                                      stats->sum += ds;
++                                      ++stats->n;
++                                      spin_unlock_irqrestore(&stats->lock, flags);
++                              }
++
+                               callback = ent->callback;
+                               context = ent->context;
+                               err = ent->ret;
++                              if (!err)
++                                      err = mlx5_copy_from_msg(ent->uout,
++                                                               ent->out,
++                                                               ent->uout_size);
++
++                              mlx5_free_cmd_msg(dev, ent->out);
++                              free_msg(dev, ent->in);
++
+                               free_cmd(ent);
+                               callback(err, context);
+                       } else {
+@@ -1160,7 +1203,8 @@ static int status_to_err(u8 status)
+       return status ? -1 : 0; /* TBD more meaningful codes */
+ }
+-static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size)
++static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size,
++                                    gfp_t gfp)
+ {
+       struct mlx5_cmd_msg *msg = ERR_PTR(-ENOMEM);
+       struct mlx5_cmd *cmd = &dev->cmd;
+@@ -1172,7 +1216,7 @@ static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size)
+               ent = &cmd->cache.med;
+       if (ent) {
+-              spin_lock(&ent->lock);
++              spin_lock_irq(&ent->lock);
+               if (!list_empty(&ent->head)) {
+                       msg = list_entry(ent->head.next, typeof(*msg), list);
+                       /* For cached lists, we must explicitly state what is
+@@ -1181,43 +1225,34 @@ static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size)
+                       msg->len = in_size;
+                       list_del(&msg->list);
+               }
+-              spin_unlock(&ent->lock);
++              spin_unlock_irq(&ent->lock);
+       }
+       if (IS_ERR(msg))
+-              msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, in_size);
++              msg = mlx5_alloc_cmd_msg(dev, gfp, in_size);
+       return msg;
+ }
+-static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
+-{
+-      if (msg->cache) {
+-              spin_lock(&msg->cache->lock);
+-              list_add_tail(&msg->list, &msg->cache->head);
+-              spin_unlock(&msg->cache->lock);
+-      } else {
+-              mlx5_free_cmd_msg(dev, msg);
+-      }
+-}
+-
+ static int is_manage_pages(struct mlx5_inbox_hdr *in)
+ {
+       return be16_to_cpu(in->opcode) == MLX5_CMD_OP_MANAGE_PAGES;
+ }
+-int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
+-                int out_size)
++static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
++                  int out_size, mlx5_cmd_cbk_t callback, void *context)
+ {
+       struct mlx5_cmd_msg *inb;
+       struct mlx5_cmd_msg *outb;
+       int pages_queue;
++      gfp_t gfp;
+       int err;
+       u8 status = 0;
+       pages_queue = is_manage_pages(in);
++      gfp = callback ? GFP_ATOMIC : GFP_KERNEL;
+-      inb = alloc_msg(dev, in_size);
++      inb = alloc_msg(dev, in_size, gfp);
+       if (IS_ERR(inb)) {
+               err = PTR_ERR(inb);
+               return err;
+@@ -1229,13 +1264,14 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
+               goto out_in;
+       }
+-      outb = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, out_size);
++      outb = mlx5_alloc_cmd_msg(dev, gfp, out_size);
+       if (IS_ERR(outb)) {
+               err = PTR_ERR(outb);
+               goto out_in;
+       }
+-      err = mlx5_cmd_invoke(dev, inb, outb, NULL, NULL, pages_queue, &status);
++      err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context,
++                            pages_queue, &status);
+       if (err)
+               goto out_out;
+@@ -1248,14 +1284,30 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
+       err = mlx5_copy_from_msg(out, outb, out_size);
+ out_out:
+-      mlx5_free_cmd_msg(dev, outb);
++      if (!callback)
++              mlx5_free_cmd_msg(dev, outb);
+ out_in:
+-      free_msg(dev, inb);
++      if (!callback)
++              free_msg(dev, inb);
+       return err;
+ }
++
++int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
++                int out_size)
++{
++      return cmd_exec(dev, in, in_size, out, out_size, NULL, NULL);
++}
+ EXPORT_SYMBOL(mlx5_cmd_exec);
++int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size,
++                   void *out, int out_size, mlx5_cmd_cbk_t callback,
++                   void *context)
++{
++      return cmd_exec(dev, in, in_size, out, out_size, callback, context);
++}
++EXPORT_SYMBOL(mlx5_cmd_exec_cb);
++
+ static void destroy_msg_cache(struct mlx5_core_dev *dev)
+ {
+       struct mlx5_cmd *cmd = &dev->cmd;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+@@ -154,10 +154,10 @@ static ssize_t average_read(struct file *filp, char __user *buf, size_t count,
+               return 0;
+       stats = filp->private_data;
+-      spin_lock(&stats->lock);
++      spin_lock_irq(&stats->lock);
+       if (stats->n)
+               field = div64_u64(stats->sum, stats->n);
+-      spin_unlock(&stats->lock);
++      spin_unlock_irq(&stats->lock);
+       ret = snprintf(tbuf, sizeof(tbuf), "%llu\n", field);
+       if (ret > 0) {
+               if (copy_to_user(buf, tbuf, ret))
+@@ -175,10 +175,10 @@ static ssize_t average_write(struct file *filp, const char __user *buf,
+       struct mlx5_cmd_stats *stats;
+       stats = filp->private_data;
+-      spin_lock(&stats->lock);
++      spin_lock_irq(&stats->lock);
+       stats->sum = 0;
+       stats->n = 0;
+-      spin_unlock(&stats->lock);
++      spin_unlock_irq(&stats->lock);
+       *pos += count;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+@@ -37,31 +37,41 @@
+ #include "mlx5_core.h"
+ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
+-                        struct mlx5_create_mkey_mbox_in *in, int inlen)
++                        struct mlx5_create_mkey_mbox_in *in, int inlen,
++                        mlx5_cmd_cbk_t callback, void *context,
++                        struct mlx5_create_mkey_mbox_out *out)
+ {
+-      struct mlx5_create_mkey_mbox_out out;
++      struct mlx5_create_mkey_mbox_out lout;
+       int err;
+       u8 key;
+-      memset(&out, 0, sizeof(out));
+-      spin_lock(&dev->priv.mkey_lock);
++      memset(&lout, 0, sizeof(lout));
++      spin_lock_irq(&dev->priv.mkey_lock);
+       key = dev->priv.mkey_key++;
+-      spin_unlock(&dev->priv.mkey_lock);
++      spin_unlock_irq(&dev->priv.mkey_lock);
+       in->seg.qpn_mkey7_0 |= cpu_to_be32(key);
+       in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_MKEY);
+-      err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
++      if (callback) {
++              err = mlx5_cmd_exec_cb(dev, in, inlen, out, sizeof(*out),
++                                     callback, context);
++              return err;
++      } else {
++              err = mlx5_cmd_exec(dev, in, inlen, &lout, sizeof(lout));
++      }
++
+       if (err) {
+               mlx5_core_dbg(dev, "cmd exec faile %d\n", err);
+               return err;
+       }
+-      if (out.hdr.status) {
+-              mlx5_core_dbg(dev, "status %d\n", out.hdr.status);
+-              return mlx5_cmd_status_to_err(&out.hdr);
++      if (lout.hdr.status) {
++              mlx5_core_dbg(dev, "status %d\n", lout.hdr.status);
++              return mlx5_cmd_status_to_err(&lout.hdr);
+       }
+-      mr->key = mlx5_idx_to_mkey(be32_to_cpu(out.mkey) & 0xffffff) | key;
+-      mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", be32_to_cpu(out.mkey), key, mr->key);
++      mr->key = mlx5_idx_to_mkey(be32_to_cpu(lout.mkey) & 0xffffff) | key;
++      mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n",
++                    be32_to_cpu(lout.mkey), key, mr->key);
+       return err;
+ }
+diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/include/linux/mlx5/driver.h
++++ b/include/linux/mlx5/driver.h
+@@ -557,9 +557,11 @@ typedef void (*mlx5_cmd_cbk_t)(int status, void *context);
+ struct mlx5_cmd_work_ent {
+       struct mlx5_cmd_msg    *in;
+       struct mlx5_cmd_msg    *out;
++      void                   *uout;
++      int                     uout_size;
+       mlx5_cmd_cbk_t          callback;
+       void                   *context;
+-      int idx;
++      int                     idx;
+       struct completion       done;
+       struct mlx5_cmd        *cmd;
+       struct work_struct      work;
+@@ -570,6 +572,7 @@ struct mlx5_cmd_work_ent {
+       u8                      token;
+       struct timespec         ts1;
+       struct timespec         ts2;
++      u16                     op;
+ };
+ struct mlx5_pas {
+@@ -653,6 +656,9 @@ void mlx5_cmd_use_polling(struct mlx5_core_dev *dev);
+ int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr);
+ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
+                 int out_size);
++int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size,
++                   void *out, int out_size, mlx5_cmd_cbk_t callback,
++                   void *context);
+ int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn);
+ int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn);
+ int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari);
+@@ -676,7 +682,9 @@ int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+ int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+                     u16 lwm, int is_srq);
+ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
+-                        struct mlx5_create_mkey_mbox_in *in, int inlen);
++                        struct mlx5_create_mkey_mbox_in *in, int inlen,
++                        mlx5_cmd_cbk_t callback, void *context,
++                        struct mlx5_create_mkey_mbox_out *out);
+ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr);
+ int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
+                        struct mlx5_query_mkey_mbox_out *out, int outlen);
+@@ -745,6 +753,11 @@ static inline u32 mlx5_idx_to_mkey(u32 mkey_idx)
+       return mkey_idx << 8;
+ }
++static inline u8 mlx5_mkey_variant(u32 mkey)
++{
++      return mkey & 0xff;
++}
++
+ enum {
+       MLX5_PROF_MASK_QP_SIZE          = (u64)1 << 0,
+       MLX5_PROF_MASK_MR_CACHE         = (u64)1 << 1,
diff --git a/linux-next-cherry-picks/0014-IB-mlx5-Fix-overflow-check-in-IB_WR_FAST_REG_MR.patch b/linux-next-cherry-picks/0014-IB-mlx5-Fix-overflow-check-in-IB_WR_FAST_REG_MR.patch
new file mode 100644 (file)
index 0000000..212f157
--- /dev/null
@@ -0,0 +1,27 @@
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Fix overflow check in IB_WR_FAST_REG_MR
+
+Make sure not to overflow when reading the page list from struct
+ib_fast_reg_page_list.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/qp.c |    4 ++++
+ 1 files changed, 4 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/qp.c
++++ b/drivers/infiniband/hw/mlx5/qp.c
+@@ -1915,6 +1915,10 @@ static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size,
+       if (unlikely((*seg == qp->sq.qend)))
+               *seg = mlx5_get_send_wqe(qp, 0);
+       if (!li) {
++              if (unlikely(wr->wr.fast_reg.page_list_len >
++                           wr->wr.fast_reg.page_list->max_page_list_len))
++                      return  -ENOMEM;
++
+               set_frwr_pages(*seg, wr, mdev, pd, writ);
+               *seg += sizeof(struct mlx5_wqe_data_seg);
+               *size += (sizeof(struct mlx5_wqe_data_seg) / 16);
diff --git a/linux-next-cherry-picks/0015-IB-mlx5-Simplify-mlx5_ib_destroy_srq.patch b/linux-next-cherry-picks/0015-IB-mlx5-Simplify-mlx5_ib_destroy_srq.patch
new file mode 100644 (file)
index 0000000..f4dbc44
--- /dev/null
@@ -0,0 +1,26 @@
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Simplify mlx5_ib_destroy_srq
+
+Make use of destroy_srq_kernel() to clear SRQ resouces.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/srq.c |    4 +---
+ 1 files changed, 1 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/srq.c
++++ b/drivers/infiniband/hw/mlx5/srq.c
+@@ -390,9 +390,7 @@ int mlx5_ib_destroy_srq(struct ib_srq *srq)
+               mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);
+               ib_umem_release(msrq->umem);
+       } else {
+-              kfree(msrq->wrid);
+-              mlx5_buf_free(&dev->mdev, &msrq->buf);
+-              mlx5_db_free(&dev->mdev, &msrq->db);
++              destroy_srq_kernel(dev, msrq);
+       }
+       kfree(srq);
diff --git a/linux-next-cherry-picks/0016-IB-mlx5-Fix-srq-free-in-destroy-qp.patch b/linux-next-cherry-picks/0016-IB-mlx5-Fix-srq-free-in-destroy-qp.patch
new file mode 100644 (file)
index 0000000..9bfbb1a
--- /dev/null
@@ -0,0 +1,52 @@
+From: Moshe Lazer <moshel@mellanox.com>
+Subject: [PATCH] IB/mlx5: Fix srq free in destroy qp
+
+On destroy QP the driver walks over the relevant CQ and removes CQEs
+reported for the destroyed QP.  It also frees the related SRQ entry
+without checking that this is actually an SRQ-related CQE.  In case of
+a CQ used for both send and receive QP, we could free SRQ entries for
+send CQEs.  This patch resolves this issue by verifying that this is a
+SRQ related CQE by checking the SRQ number in the CQE is not zero.
+
+Signed-off-by: Moshe Lazer <moshel@mellanox.com>
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/cq.c |   16 ++++------------
+ 1 files changed, 4 insertions(+), 12 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/cq.c
++++ b/drivers/infiniband/hw/mlx5/cq.c
+@@ -750,17 +750,9 @@ int mlx5_ib_destroy_cq(struct ib_cq *cq)
+       return 0;
+ }
+-static int is_equal_rsn(struct mlx5_cqe64 *cqe64, struct mlx5_ib_srq *srq,
+-                      u32 rsn)
++static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn)
+ {
+-      u32 lrsn;
+-
+-      if (srq)
+-              lrsn = be32_to_cpu(cqe64->srqn) & 0xffffff;
+-      else
+-              lrsn = be32_to_cpu(cqe64->sop_drop_qpn) & 0xffffff;
+-
+-      return rsn == lrsn;
++      return rsn == (ntohl(cqe64->sop_drop_qpn) & 0xffffff);
+ }
+ void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq)
+@@ -790,8 +782,8 @@ void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq)
+       while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
+               cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
+               cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
+-              if (is_equal_rsn(cqe64, srq, rsn)) {
+-                      if (srq)
++              if (is_equal_rsn(cqe64, rsn)) {
++                      if (srq && (ntohl(cqe64->srqn) & 0xffffff))
+                               mlx5_ib_free_srq_wqe(srq, be16_to_cpu(cqe64->wqe_counter));
+                       ++nfreed;
+               } else if (nfreed) {
diff --git a/linux-next-cherry-picks/0017-mlx5-Fix-cleanup-flow-when-DMA-mapping-fails.patch b/linux-next-cherry-picks/0017-mlx5-Fix-cleanup-flow-when-DMA-mapping-fails.patch
new file mode 100644 (file)
index 0000000..c6b848b
--- /dev/null
@@ -0,0 +1,52 @@
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] mlx5: Fix cleanup flow when DMA mapping fails
+
+If DMA mapping fails, the driver cleared the object that holds the
+previously DMA mapped pages. Fix this by allocating a new object for
+the command that reports back to firmware that pages can't be
+supplied.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ .../net/ethernet/mellanox/mlx5/core/pagealloc.c    |   18 +++++++++++++-----
+ 1 files changed, 13 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+@@ -181,6 +181,7 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
+ {
+       struct mlx5_manage_pages_inbox *in;
+       struct mlx5_manage_pages_outbox out;
++      struct mlx5_manage_pages_inbox *nin;
+       struct page *page;
+       int inlen;
+       u64 addr;
+@@ -247,13 +248,20 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
+ out_alloc:
+       if (notify_fail) {
+-              memset(in, 0, inlen);
++              nin = kzalloc(sizeof(*nin), GFP_KERNEL);
++              if (!nin) {
++                      mlx5_core_warn(dev, "allocation failed\n");
++                      goto unmap;
++              }
+               memset(&out, 0, sizeof(out));
+-              in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
+-              in->hdr.opmod = cpu_to_be16(MLX5_PAGES_CANT_GIVE);
+-              if (mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out)))
+-                      mlx5_core_warn(dev, "\n");
++              nin->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
++              nin->hdr.opmod = cpu_to_be16(MLX5_PAGES_CANT_GIVE);
++              if (mlx5_cmd_exec(dev, nin, sizeof(*nin), &out, sizeof(out)))
++                      mlx5_core_warn(dev, "page notify failed\n");
++              kfree(nin);
+       }
++
++unmap:
+       for (i--; i >= 0; i--) {
+               addr = be64_to_cpu(in->pas[i]);
+               page = remove_page(dev, addr);
diff --git a/linux-next-cherry-picks/0018-mlx5-Support-communicating-arbitrary-host-page-size-.patch b/linux-next-cherry-picks/0018-mlx5-Support-communicating-arbitrary-host-page-size-.patch
new file mode 100644 (file)
index 0000000..b0884b3
--- /dev/null
@@ -0,0 +1,394 @@
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] mlx5: Support communicating arbitrary host page size to firmware
+
+Connect-IB firmware requires 4K pages to be communicated with the
+driver. This patch breaks larger pages to 4K units to enable support
+for architectures utilizing larger page size, such as PowerPC.  This
+patch also fixes several places that referred to PAGE_SHIFT instead of
+explicit 12 which is the inherent page shift on Connect-IB.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/cq.c                    |    2 +-
+ drivers/infiniband/hw/mlx5/qp.c                    |    4 +-
+ drivers/infiniband/hw/mlx5/srq.c                   |    4 +-
+ drivers/net/ethernet/mellanox/mlx5/core/eq.c       |    2 +-
+ .../net/ethernet/mellanox/mlx5/core/pagealloc.c    |  174 ++++++++++++++------
+ include/linux/mlx5/driver.h                        |    1 +
+ 6 files changed, 127 insertions(+), 60 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/cq.c
++++ b/drivers/infiniband/hw/mlx5/cq.c
+@@ -620,7 +620,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
+       }
+       mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas);
+-      (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - PAGE_SHIFT;
++      (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - 12;
+       *index = dev->mdev.priv.uuari.uars[0].index;
+       return 0;
+diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/qp.c
++++ b/drivers/infiniband/hw/mlx5/qp.c
+@@ -551,7 +551,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+       }
+       mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
+       (*in)->ctx.log_pg_sz_remote_qpn =
+-              cpu_to_be32((page_shift - PAGE_SHIFT) << 24);
++              cpu_to_be32((page_shift - 12) << 24);
+       (*in)->ctx.params2 = cpu_to_be32(offset << 6);
+       (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
+@@ -648,7 +648,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
+               goto err_buf;
+       }
+       (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
+-      (*in)->ctx.log_pg_sz_remote_qpn = cpu_to_be32((qp->buf.page_shift - PAGE_SHIFT) << 24);
++      (*in)->ctx.log_pg_sz_remote_qpn = cpu_to_be32((qp->buf.page_shift - 12) << 24);
+       /* Set "fast registration enabled" for all kernel QPs */
+       (*in)->ctx.params1 |= cpu_to_be32(1 << 11);
+       (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4);
+diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/srq.c
++++ b/drivers/infiniband/hw/mlx5/srq.c
+@@ -123,7 +123,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
+               goto err_in;
+       }
+-      (*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
++      (*in)->ctx.log_pg_sz = page_shift - 12;
+       (*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
+       return 0;
+@@ -192,7 +192,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
+       }
+       srq->wq_sig = !!srq_signature;
+-      (*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
++      (*in)->ctx.log_pg_sz = page_shift - 12;
+       return 0;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+@@ -354,7 +354,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
+       in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_EQ);
+       in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(eq->nent) << 24 | uar->index);
+       in->ctx.intr = vecidx;
+-      in->ctx.log_page_size = PAGE_SHIFT - 12;
++      in->ctx.log_page_size = eq->buf.page_shift - 12;
+       in->events_mask = cpu_to_be64(mask);
+       err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+@@ -57,10 +57,13 @@ struct mlx5_pages_req {
+ };
+ struct fw_page {
+-      struct rb_node  rb_node;
+-      u64             addr;
+-      struct page     *page;
+-      u16             func_id;
++      struct rb_node          rb_node;
++      u64                     addr;
++      struct page            *page;
++      u16                     func_id;
++      unsigned long           bitmask;
++      struct list_head        list;
++      unsigned                free_count;
+ };
+ struct mlx5_query_pages_inbox {
+@@ -94,6 +97,11 @@ enum {
+       MAX_RECLAIM_TIME_MSECS  = 5000,
+ };
++enum {
++      MLX5_MAX_RECLAIM_TIME_MILI      = 5000,
++      MLX5_NUM_4K_IN_PAGE             = PAGE_SIZE / 4096,
++};
++
+ static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id)
+ {
+       struct rb_root *root = &dev->priv.page_root;
+@@ -101,6 +109,7 @@ static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u
+       struct rb_node *parent = NULL;
+       struct fw_page *nfp;
+       struct fw_page *tfp;
++      int i;
+       while (*new) {
+               parent = *new;
+@@ -113,25 +122,29 @@ static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u
+                       return -EEXIST;
+       }
+-      nfp = kmalloc(sizeof(*nfp), GFP_KERNEL);
++      nfp = kzalloc(sizeof(*nfp), GFP_KERNEL);
+       if (!nfp)
+               return -ENOMEM;
+       nfp->addr = addr;
+       nfp->page = page;
+       nfp->func_id = func_id;
++      nfp->free_count = MLX5_NUM_4K_IN_PAGE;
++      for (i = 0; i < MLX5_NUM_4K_IN_PAGE; i++)
++              set_bit(i, &nfp->bitmask);
+       rb_link_node(&nfp->rb_node, parent, new);
+       rb_insert_color(&nfp->rb_node, root);
++      list_add(&nfp->list, &dev->priv.free_list);
+       return 0;
+ }
+-static struct page *remove_page(struct mlx5_core_dev *dev, u64 addr)
++static struct fw_page *find_fw_page(struct mlx5_core_dev *dev, u64 addr)
+ {
+       struct rb_root *root = &dev->priv.page_root;
+       struct rb_node *tmp = root->rb_node;
+-      struct page *result = NULL;
++      struct fw_page *result = NULL;
+       struct fw_page *tfp;
+       while (tmp) {
+@@ -141,9 +154,7 @@ static struct page *remove_page(struct mlx5_core_dev *dev, u64 addr)
+               } else if (tfp->addr > addr) {
+                       tmp = tmp->rb_right;
+               } else {
+-                      rb_erase(&tfp->rb_node, root);
+-                      result = tfp->page;
+-                      kfree(tfp);
++                      result = tfp;
+                       break;
+               }
+       }
+@@ -176,13 +187,97 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
+       return err;
+ }
++static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr)
++{
++      struct fw_page *fp;
++      unsigned n;
++
++      if (list_empty(&dev->priv.free_list)) {
++              return -ENOMEM;
++              mlx5_core_warn(dev, "\n");
++      }
++
++      fp = list_entry(dev->priv.free_list.next, struct fw_page, list);
++      n = find_first_bit(&fp->bitmask, 8 * sizeof(fp->bitmask));
++      if (n >= MLX5_NUM_4K_IN_PAGE) {
++              mlx5_core_warn(dev, "alloc 4k bug\n");
++              return -ENOENT;
++      }
++      clear_bit(n, &fp->bitmask);
++      fp->free_count--;
++      if (!fp->free_count)
++              list_del(&fp->list);
++
++      *addr = fp->addr + n * 4096;
++
++      return 0;
++}
++
++static void free_4k(struct mlx5_core_dev *dev, u64 addr)
++{
++      struct fw_page *fwp;
++      int n;
++
++      fwp = find_fw_page(dev, addr & PAGE_MASK);
++      if (!fwp) {
++              mlx5_core_warn(dev, "page not found\n");
++              return;
++      }
++
++      n = (addr & ~PAGE_MASK) % 4096;
++      fwp->free_count++;
++      set_bit(n, &fwp->bitmask);
++      if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) {
++              rb_erase(&fwp->rb_node, &dev->priv.page_root);
++              list_del(&fwp->list);
++              dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
++              __free_page(fwp->page);
++              kfree(fwp);
++      } else if (fwp->free_count == 1) {
++              list_add(&fwp->list, &dev->priv.free_list);
++      }
++}
++
++static int alloc_system_page(struct mlx5_core_dev *dev, u16 func_id)
++{
++      struct page *page;
++      u64 addr;
++      int err;
++
++      page = alloc_page(GFP_HIGHUSER);
++      if (!page) {
++              mlx5_core_warn(dev, "failed to allocate page\n");
++              return -ENOMEM;
++      }
++      addr = dma_map_page(&dev->pdev->dev, page, 0,
++                          PAGE_SIZE, DMA_BIDIRECTIONAL);
++      if (dma_mapping_error(&dev->pdev->dev, addr)) {
++              mlx5_core_warn(dev, "failed dma mapping page\n");
++              err = -ENOMEM;
++              goto out_alloc;
++      }
++      err = insert_page(dev, addr, page, func_id);
++      if (err) {
++              mlx5_core_err(dev, "failed to track allocated page\n");
++              goto out_mapping;
++      }
++
++      return 0;
++
++out_mapping:
++      dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
++
++out_alloc:
++      __free_page(page);
++
++      return err;
++}
+ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
+                     int notify_fail)
+ {
+       struct mlx5_manage_pages_inbox *in;
+       struct mlx5_manage_pages_outbox out;
+       struct mlx5_manage_pages_inbox *nin;
+-      struct page *page;
+       int inlen;
+       u64 addr;
+       int err;
+@@ -197,27 +292,15 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
+       memset(&out, 0, sizeof(out));
+       for (i = 0; i < npages; i++) {
+-              page = alloc_page(GFP_HIGHUSER);
+-              if (!page) {
+-                      err = -ENOMEM;
+-                      mlx5_core_warn(dev, "failed to allocate page\n");
+-                      goto out_alloc;
+-              }
+-              addr = dma_map_page(&dev->pdev->dev, page, 0,
+-                                  PAGE_SIZE, DMA_BIDIRECTIONAL);
+-              if (dma_mapping_error(&dev->pdev->dev, addr)) {
+-                      mlx5_core_warn(dev, "failed dma mapping page\n");
+-                      __free_page(page);
+-                      err = -ENOMEM;
+-                      goto out_alloc;
+-              }
+-              err = insert_page(dev, addr, page, func_id);
++retry:
++              err = alloc_4k(dev, &addr);
+               if (err) {
+-                      mlx5_core_err(dev, "failed to track allocated page\n");
+-                      dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+-                      __free_page(page);
+-                      err = -ENOMEM;
+-                      goto out_alloc;
++                      if (err == -ENOMEM)
++                              err = alloc_system_page(dev, func_id);
++                      if (err)
++                              goto out_4k;
++
++                      goto retry;
+               }
+               in->pas[i] = cpu_to_be64(addr);
+       }
+@@ -227,7 +310,6 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
+       in->func_id = cpu_to_be16(func_id);
+       in->num_entries = cpu_to_be32(npages);
+       err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
+-      mlx5_core_dbg(dev, "err %d\n", err);
+       if (err) {
+               mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n", func_id, npages, err);
+               goto out_alloc;
+@@ -251,7 +333,7 @@ out_alloc:
+               nin = kzalloc(sizeof(*nin), GFP_KERNEL);
+               if (!nin) {
+                       mlx5_core_warn(dev, "allocation failed\n");
+-                      goto unmap;
++                      goto out_4k;
+               }
+               memset(&out, 0, sizeof(out));
+               nin->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
+@@ -261,19 +343,9 @@ out_alloc:
+               kfree(nin);
+       }
+-unmap:
+-      for (i--; i >= 0; i--) {
+-              addr = be64_to_cpu(in->pas[i]);
+-              page = remove_page(dev, addr);
+-              if (!page) {
+-                      mlx5_core_err(dev, "BUG: can't remove page at addr 0x%llx\n",
+-                                    addr);
+-                      continue;
+-              }
+-              dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+-              __free_page(page);
+-      }
+-
++out_4k:
++      for (i--; i >= 0; i--)
++              free_4k(dev, be64_to_cpu(in->pas[i]));
+ out_free:
+       mlx5_vfree(in);
+       return err;
+@@ -284,7 +356,6 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
+ {
+       struct mlx5_manage_pages_inbox   in;
+       struct mlx5_manage_pages_outbox *out;
+-      struct page *page;
+       int num_claimed;
+       int outlen;
+       u64 addr;
+@@ -323,13 +394,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
+       for (i = 0; i < num_claimed; i++) {
+               addr = be64_to_cpu(out->pas[i]);
+-              page = remove_page(dev, addr);
+-              if (!page) {
+-                      mlx5_core_warn(dev, "FW reported unknown DMA address 0x%llx\n", addr);
+-              } else {
+-                      dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+-                      __free_page(page);
+-              }
++              free_4k(dev, addr);
+       }
+ out_free:
+@@ -435,6 +500,7 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
+ void mlx5_pagealloc_init(struct mlx5_core_dev *dev)
+ {
+       dev->priv.page_root = RB_ROOT;
++      INIT_LIST_HEAD(&dev->priv.free_list);
+ }
+ void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev)
+diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/include/linux/mlx5/driver.h
++++ b/include/linux/mlx5/driver.h
+@@ -483,6 +483,7 @@ struct mlx5_priv {
+       struct rb_root          page_root;
+       int                     fw_pages;
+       int                     reg_pages;
++      struct list_head        free_list;
+       struct mlx5_core_health health;
diff --git a/linux-next-cherry-picks/0019-mlx5-Clear-reserved-area-in-set_hca_cap.patch b/linux-next-cherry-picks/0019-mlx5-Clear-reserved-area-in-set_hca_cap.patch
new file mode 100644 (file)
index 0000000..91b75b9
--- /dev/null
@@ -0,0 +1,105 @@
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] mlx5: Clear reserved area in set_hca_cap()
+
+Firmware spec requires reserved fields to be cleared when calling
+set_hca_cap.  Current code queries and copy to the set area, possibly
+resulting in reserved bits not cleared. This patch copies only
+writable fields to the set area.
+
+Fix also typo - msx => max
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/main.c |   35 +++++++++++++++++++++--
+ include/linux/mlx5/device.h                    |    9 +++++-
+ 2 files changed, 39 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -159,6 +159,36 @@ struct mlx5_reg_host_endianess {
+       u8      rsvd[15];
+ };
++
++#define CAP_MASK(pos, size) ((u64)((1 << (size)) - 1) << (pos))
++
++enum {
++      MLX5_CAP_BITS_RW_MASK   = CAP_MASK(MLX5_CAP_OFF_CMDIF_CSUM, 2) |
++                                CAP_MASK(MLX5_CAP_OFF_DCT, 1),
++};
++
++/* selectively copy writable fields clearing any reserved area
++ */
++static void copy_rw_fields(struct mlx5_hca_cap *to, struct mlx5_hca_cap *from)
++{
++      u64 v64;
++
++      to->log_max_qp = from->log_max_qp & 0x1f;
++      to->log_max_ra_req_dc = from->log_max_ra_req_dc & 0x3f;
++      to->log_max_ra_res_dc = from->log_max_ra_res_dc & 0x3f;
++      to->log_max_ra_req_qp = from->log_max_ra_req_qp & 0x3f;
++      to->log_max_ra_res_qp = from->log_max_ra_res_qp & 0x3f;
++      to->log_max_atomic_size_qp = from->log_max_atomic_size_qp;
++      to->log_max_atomic_size_dc = from->log_max_atomic_size_dc;
++      v64 = be64_to_cpu(from->flags) & MLX5_CAP_BITS_RW_MASK;
++      to->flags = cpu_to_be64(v64);
++}
++
++enum {
++      HCA_CAP_OPMOD_GET_MAX   = 0,
++      HCA_CAP_OPMOD_GET_CUR   = 1,
++};
++
+ static int handle_hca_cap(struct mlx5_core_dev *dev)
+ {
+       struct mlx5_cmd_query_hca_cap_mbox_out *query_out = NULL;
+@@ -180,7 +210,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
+       }
+       query_ctx.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_HCA_CAP);
+-      query_ctx.hdr.opmod  = cpu_to_be16(0x1);
++      query_ctx.hdr.opmod  = cpu_to_be16(HCA_CAP_OPMOD_GET_CUR);
+       err = mlx5_cmd_exec(dev, &query_ctx, sizeof(query_ctx),
+                                query_out, sizeof(*query_out));
+       if (err)
+@@ -192,8 +222,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
+               goto query_ex;
+       }
+-      memcpy(&set_ctx->hca_cap, &query_out->hca_cap,
+-             sizeof(set_ctx->hca_cap));
++      copy_rw_fields(&set_ctx->hca_cap, &query_out->hca_cap);
+       if (dev->profile->mask & MLX5_PROF_MASK_QP_SIZE)
+               set_ctx->hca_cap.log_max_qp = dev->profile->log_max_qp;
+diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/include/linux/mlx5/device.h
++++ b/include/linux/mlx5/device.h
+@@ -230,6 +230,11 @@ enum {
+       MLX5_MAX_PAGE_SHIFT             = 31
+ };
++enum {
++      MLX5_CAP_OFF_DCT                = 41,
++      MLX5_CAP_OFF_CMDIF_CSUM         = 46,
++};
++
+ struct mlx5_inbox_hdr {
+       __be16          opcode;
+       u8              rsvd[4];
+@@ -319,9 +324,9 @@ struct mlx5_hca_cap {
+       u8      rsvd25[42];
+       __be16  log_uar_page_sz;
+       u8      rsvd26[28];
+-      u8      log_msx_atomic_size_qp;
++      u8      log_max_atomic_size_qp;
+       u8      rsvd27[2];
+-      u8      log_msx_atomic_size_dc;
++      u8      log_max_atomic_size_dc;
+       u8      rsvd28[76];
+ };
diff --git a/linux-next-cherry-picks/0020-mlx5_core-Change-optimal_reclaimed_pages-for-better-.patch b/linux-next-cherry-picks/0020-mlx5_core-Change-optimal_reclaimed_pages-for-better-.patch
new file mode 100644 (file)
index 0000000..a646ba1
--- /dev/null
@@ -0,0 +1,41 @@
+From: Moshe Lazer <moshel@mellanox.com>
+Subject: [PATCH] mlx5_core: Change optimal_reclaimed_pages for better performance
+
+Change optimal_reclaimed_pages() to increase the output size of each
+reclaim pages command. This change reduces significantly the amount of
+reclaim pages commands issued to FW when the driver is unloaded which
+reduces the overall driver unload time.
+
+Signed-off-by: Moshe Lazer <moshel@mellanox.com>
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ .../net/ethernet/mellanox/mlx5/core/pagealloc.c    |    9 +++++++--
+ 1 files changed, 7 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+@@ -454,14 +454,19 @@ int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot)
+       return give_pages(dev, func_id, npages, 0);
+ }
++enum {
++      MLX5_BLKS_FOR_RECLAIM_PAGES = 12
++};
++
+ static int optimal_reclaimed_pages(void)
+ {
+       struct mlx5_cmd_prot_block *block;
+       struct mlx5_cmd_layout *lay;
+       int ret;
+-      ret = (sizeof(lay->in) + sizeof(block->data) -
+-             sizeof(struct mlx5_manage_pages_outbox)) / 8;
++      ret = (sizeof(lay->out) + MLX5_BLKS_FOR_RECLAIM_PAGES * sizeof(block->data) -
++             sizeof(struct mlx5_manage_pages_outbox)) /
++             FIELD_SIZEOF(struct mlx5_manage_pages_outbox, pas[0]);
+       return ret;
+ }
diff --git a/linux-next-cherry-picks/0021-IB-mlx5-Remove-dead-code-in-mr.c.patch b/linux-next-cherry-picks/0021-IB-mlx5-Remove-dead-code-in-mr.c.patch
new file mode 100644 (file)
index 0000000..acdef91
--- /dev/null
@@ -0,0 +1,49 @@
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Remove dead code in mr.c
+
+In mlx5_mr_cache_init() the size variable is not used so remove it to
+avoid compiler warnings when running with make W=1.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/mr.c |   10 +++-------
+ 1 files changed, 3 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/mr.c
++++ b/drivers/infiniband/hw/mlx5/mr.c
+@@ -40,7 +40,6 @@
+ #include "mlx5_ib.h"
+ enum {
+-      DEF_CACHE_SIZE  = 10,
+       MAX_PENDING_REG_MR = 8,
+ };
+@@ -550,7 +549,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
+       struct mlx5_mr_cache *cache = &dev->cache;
+       struct mlx5_cache_ent *ent;
+       int limit;
+-      int size;
+       int err;
+       int i;
+@@ -571,13 +569,11 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
+               ent->order = i + 2;
+               ent->dev = dev;
+-              if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE) {
+-                      size = dev->mdev.profile->mr_cache[i].size;
++              if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE)
+                       limit = dev->mdev.profile->mr_cache[i].limit;
+-              } else {
+-                      size = DEF_CACHE_SIZE;
++              else
+                       limit = 0;
+-              }
++
+               INIT_WORK(&ent->work, cache_work_func);
+               INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
+               ent->limit = limit;
diff --git a/linux-next-cherry-picks/0022-IB-mlx5-Remove-Always-false-comparison.patch b/linux-next-cherry-picks/0022-IB-mlx5-Remove-Always-false-comparison.patch
new file mode 100644 (file)
index 0000000..4c92c81
--- /dev/null
@@ -0,0 +1,25 @@
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Remove "Always false" comparison
+
+mlx5_cur and mlx5_new cannot have negative values so remove the
+redundant condition.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/qp.c |    2 +-
+ 1 files changed, 1 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/qp.c
++++ b/drivers/infiniband/hw/mlx5/qp.c
+@@ -1550,7 +1550,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
+       mlx5_cur = to_mlx5_state(cur_state);
+       mlx5_new = to_mlx5_state(new_state);
+       mlx5_st = to_mlx5_st(ibqp->qp_type);
+-      if (mlx5_cur < 0 || mlx5_new < 0 || mlx5_st < 0)
++      if (mlx5_st < 0)
+               goto out;
+       optpar = ib_mask_to_mlx5_opt(attr_mask);
diff --git a/linux-next-cherry-picks/0023-IB-mlx5-Update-opt-param-mask-for-RTS2RTS.patch b/linux-next-cherry-picks/0023-IB-mlx5-Update-opt-param-mask-for-RTS2RTS.patch
new file mode 100644 (file)
index 0000000..f36b959
--- /dev/null
@@ -0,0 +1,29 @@
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Update opt param mask for RTS2RTS
+
+RTS to RTS transition should allow update of alternate path.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/qp.c |    6 ++++--
+ 1 files changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/qp.c
++++ b/drivers/infiniband/hw/mlx5/qp.c
+@@ -1317,9 +1317,11 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q
+                                         MLX5_QP_OPTPAR_RAE            |
+                                         MLX5_QP_OPTPAR_RWE            |
+                                         MLX5_QP_OPTPAR_RNR_TIMEOUT    |
+-                                        MLX5_QP_OPTPAR_PM_STATE,
++                                        MLX5_QP_OPTPAR_PM_STATE       |
++                                        MLX5_QP_OPTPAR_ALT_ADDR_PATH,
+                       [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE            |
+-                                        MLX5_QP_OPTPAR_PM_STATE,
++                                        MLX5_QP_OPTPAR_PM_STATE       |
++                                        MLX5_QP_OPTPAR_ALT_ADDR_PATH,
+                       [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY          |
+                                         MLX5_QP_OPTPAR_SRQN           |
+                                         MLX5_QP_OPTPAR_CQN_RCV,
diff --git a/linux-next-cherry-picks/0024-mlx5-Use-enum-to-indicate-adapter-page-size.patch b/linux-next-cherry-picks/0024-mlx5-Use-enum-to-indicate-adapter-page-size.patch
new file mode 100644 (file)
index 0000000..ff4539d
--- /dev/null
@@ -0,0 +1,103 @@
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] mlx5: Use enum to indicate adapter page size
+
+The Connect-IB adapter has an inherent page size which equals 4K.
+Define an new enum that equals the page shift and use it instead of
+using the value 12 throughout the code.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/cq.c              |    2 +-
+ drivers/infiniband/hw/mlx5/qp.c              |    5 +++--
+ drivers/infiniband/hw/mlx5/srq.c             |    4 ++--
+ drivers/net/ethernet/mellanox/mlx5/core/eq.c |    2 +-
+ include/linux/mlx5/device.h                  |    4 ++++
+ 5 files changed, 11 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/cq.c
++++ b/drivers/infiniband/hw/mlx5/cq.c
+@@ -620,7 +620,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
+       }
+       mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas);
+-      (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - 12;
++      (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+       *index = dev->mdev.priv.uuari.uars[0].index;
+       return 0;
+diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/qp.c
++++ b/drivers/infiniband/hw/mlx5/qp.c
+@@ -551,7 +551,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+       }
+       mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
+       (*in)->ctx.log_pg_sz_remote_qpn =
+-              cpu_to_be32((page_shift - 12) << 24);
++              cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
+       (*in)->ctx.params2 = cpu_to_be32(offset << 6);
+       (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
+@@ -648,7 +648,8 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
+               goto err_buf;
+       }
+       (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
+-      (*in)->ctx.log_pg_sz_remote_qpn = cpu_to_be32((qp->buf.page_shift - 12) << 24);
++      (*in)->ctx.log_pg_sz_remote_qpn =
++              cpu_to_be32((qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
+       /* Set "fast registration enabled" for all kernel QPs */
+       (*in)->ctx.params1 |= cpu_to_be32(1 << 11);
+       (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4);
+diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/srq.c
++++ b/drivers/infiniband/hw/mlx5/srq.c
+@@ -123,7 +123,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
+               goto err_in;
+       }
+-      (*in)->ctx.log_pg_sz = page_shift - 12;
++      (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+       (*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
+       return 0;
+@@ -192,7 +192,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
+       }
+       srq->wq_sig = !!srq_signature;
+-      (*in)->ctx.log_pg_sz = page_shift - 12;
++      (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+       return 0;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+@@ -354,7 +354,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
+       in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_EQ);
+       in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(eq->nent) << 24 | uar->index);
+       in->ctx.intr = vecidx;
+-      in->ctx.log_page_size = eq->buf.page_shift - 12;
++      in->ctx.log_page_size = eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+       in->events_mask = cpu_to_be64(mask);
+       err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
+diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/include/linux/mlx5/device.h
++++ b/include/linux/mlx5/device.h
+@@ -231,6 +231,10 @@ enum {
+ };
+ enum {
++      MLX5_ADAPTER_PAGE_SHIFT         = 12
++};
++
++enum {
+       MLX5_CAP_OFF_DCT                = 41,
+       MLX5_CAP_OFF_CMDIF_CSUM         = 46,
+ };
diff --git a/linux-next-cherry-picks/0025-IB-mlx5-Remove-dead-code.patch b/linux-next-cherry-picks/0025-IB-mlx5-Remove-dead-code.patch
new file mode 100644 (file)
index 0000000..8301cd2
--- /dev/null
@@ -0,0 +1,33 @@
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Remove dead code
+
+The value of the local variable index is never used in reg_mr_callback().
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+
+[ Remove now-unused variable delta too.  - Roland ]
+
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/mr.c |    6 ------
+ 1 files changed, 0 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/mr.c
++++ b/drivers/infiniband/hw/mlx5/mr.c
+@@ -72,14 +72,8 @@ static void reg_mr_callback(int status, void *context)
+       int c = order2idx(dev, mr->order);
+       struct mlx5_cache_ent *ent = &cache->ent[c];
+       u8 key;
+-      unsigned long delta = jiffies - mr->start;
+-      unsigned long index;
+       unsigned long flags;
+-      index = find_last_bit(&delta, 8 * sizeof(delta));
+-      if (index == 64)
+-              index = 0;
+-
+       spin_lock_irqsave(&ent->lock, flags);
+       ent->pending--;
+       spin_unlock_irqrestore(&ent->lock, flags);
diff --git a/linux-next-cherry-picks/0026-IB-mlx5-Fix-list_del-of-empty-list.patch b/linux-next-cherry-picks/0026-IB-mlx5-Fix-list_del-of-empty-list.patch
new file mode 100644 (file)
index 0000000..7ced1d9
--- /dev/null
@@ -0,0 +1,26 @@
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Fix list_del of empty list
+
+For archs with pages size of 4K, when the chunk is freed, fwp is not in the
+list so avoid attempting to delete it.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ .../net/ethernet/mellanox/mlx5/core/pagealloc.c    |    3 ++-
+ 1 files changed, 2 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+@@ -229,7 +229,8 @@ static void free_4k(struct mlx5_core_dev *dev, u64 addr)
+       set_bit(n, &fwp->bitmask);
+       if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) {
+               rb_erase(&fwp->rb_node, &dev->priv.page_root);
+-              list_del(&fwp->list);
++              if (fwp->free_count != 1)
++                      list_del(&fwp->list);
+               dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+               __free_page(fwp->page);
+               kfree(fwp);
diff --git a/linux-next-cherry-picks/0027-IB-mlx5-Fix-page-shift-in-create-CQ-for-userspace.patch b/linux-next-cherry-picks/0027-IB-mlx5-Fix-page-shift-in-create-CQ-for-userspace.patch
new file mode 100644 (file)
index 0000000..b58309c
--- /dev/null
@@ -0,0 +1,24 @@
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Fix page shift in create CQ for userspace
+
+When creating a CQ, we must use mlx5 adapter page shift.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/cq.c |    2 +-
+ 1 files changed, 1 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/cq.c
++++ b/drivers/infiniband/hw/mlx5/cq.c
+@@ -556,7 +556,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
+               goto err_db;
+       }
+       mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, (*cqb)->pas, 0);
+-      (*cqb)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
++      (*cqb)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+       *index = to_mucontext(context)->uuari.uars[0].index;
diff --git a/linux-next-cherry-picks/0028-IB-mlx5-Remove-unused-code-in-mr.c.patch b/linux-next-cherry-picks/0028-IB-mlx5-Remove-unused-code-in-mr.c.patch
new file mode 100644 (file)
index 0000000..cd94d14
--- /dev/null
@@ -0,0 +1,36 @@
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Remove unused code in mr.c
+
+The variable start in struct mlx5_ib_mr is never used. Remove it.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/mlx5_ib.h |    1 -
+ drivers/infiniband/hw/mlx5/mr.c      |    1 -
+ 2 files changed, 0 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
++++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
+@@ -264,7 +264,6 @@ struct mlx5_ib_mr {
+       enum ib_wc_status       status;
+       struct mlx5_ib_dev     *dev;
+       struct mlx5_create_mkey_mbox_out out;
+-      unsigned long           start;
+ };
+ struct mlx5_ib_fast_reg_page_list {
+diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/mr.c
++++ b/drivers/infiniband/hw/mlx5/mr.c
+@@ -146,7 +146,6 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
+               spin_lock_irq(&ent->lock);
+               ent->pending++;
+               spin_unlock_irq(&ent->lock);
+-              mr->start = jiffies;
+               err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in,
+                                           sizeof(*in), reg_mr_callback,
+                                           mr, &mr->out);
diff --git a/linux-next-cherry-picks/0029-mlx5_core-Remove-dead-code.patch b/linux-next-cherry-picks/0029-mlx5_core-Remove-dead-code.patch
new file mode 100644 (file)
index 0000000..a7d65c3
--- /dev/null
@@ -0,0 +1,27 @@
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Subject: [PATCH] mlx5_core: Remove dead code
+
+Remove leftover of debug code.
+
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ .../net/ethernet/mellanox/mlx5/core/pagealloc.c    |    4 +---
+ 1 files changed, 1 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+@@ -192,10 +192,8 @@ static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr)
+       struct fw_page *fp;
+       unsigned n;
+-      if (list_empty(&dev->priv.free_list)) {
++      if (list_empty(&dev->priv.free_list))
+               return -ENOMEM;
+-              mlx5_core_warn(dev, "\n");
+-      }
+       fp = list_entry(dev->priv.free_list.next, struct fw_page, list);
+       n = find_first_bit(&fp->bitmask, 8 * sizeof(fp->bitmask));
diff --git a/linux-next-cherry-picks/0030-IB-mlx5-Fix-micro-UAR-allocator.patch b/linux-next-cherry-picks/0030-IB-mlx5-Fix-micro-UAR-allocator.patch
new file mode 100644 (file)
index 0000000..f5a7db4
--- /dev/null
@@ -0,0 +1,200 @@
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Fix micro UAR allocator
+
+The micro UAR (uuar) allocator had a bug which resulted from the fact
+that in each UAR we only have two micro UARs avaialable, those at
+index 0 and 1.  This patch defines iterators to aid in traversing the
+list of available micro UARs when allocating a uuar.
+
+In addition, change the logic in create_user_qp() so that if high
+class allocation fails (high class means lower latency), we revert to
+medium class and not to the low class.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/main.c |   13 ++++--
+ drivers/infiniband/hw/mlx5/qp.c   |   77 ++++++++++++++++++++++++++++---------
+ include/linux/mlx5/device.h       |    7 ++-
+ 3 files changed, 70 insertions(+), 27 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/main.c
++++ b/drivers/infiniband/hw/mlx5/main.c
+@@ -541,6 +541,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
+       struct mlx5_ib_ucontext *context;
+       struct mlx5_uuar_info *uuari;
+       struct mlx5_uar *uars;
++      int gross_uuars;
+       int num_uars;
+       int uuarn;
+       int err;
+@@ -559,11 +560,13 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
+       if (req.total_num_uuars == 0)
+               return ERR_PTR(-EINVAL);
+-      req.total_num_uuars = ALIGN(req.total_num_uuars, MLX5_BF_REGS_PER_PAGE);
++      req.total_num_uuars = ALIGN(req.total_num_uuars,
++                                  MLX5_NON_FP_BF_REGS_PER_PAGE);
+       if (req.num_low_latency_uuars > req.total_num_uuars - 1)
+               return ERR_PTR(-EINVAL);
+-      num_uars = req.total_num_uuars / MLX5_BF_REGS_PER_PAGE;
++      num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
++      gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
+       resp.qp_tab_size      = 1 << dev->mdev.caps.log_max_qp;
+       resp.bf_reg_size      = dev->mdev.caps.bf_reg_size;
+       resp.cache_line_size  = L1_CACHE_BYTES;
+@@ -585,7 +588,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
+               goto out_ctx;
+       }
+-      uuari->bitmap = kcalloc(BITS_TO_LONGS(req.total_num_uuars),
++      uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars),
+                               sizeof(*uuari->bitmap),
+                               GFP_KERNEL);
+       if (!uuari->bitmap) {
+@@ -595,13 +598,13 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
+       /*
+        * clear all fast path uuars
+        */
+-      for (i = 0; i < req.total_num_uuars; i++) {
++      for (i = 0; i < gross_uuars; i++) {
+               uuarn = i & 3;
+               if (uuarn == 2 || uuarn == 3)
+                       set_bit(i, uuari->bitmap);
+       }
+-      uuari->count = kcalloc(req.total_num_uuars, sizeof(*uuari->count), GFP_KERNEL);
++      uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL);
+       if (!uuari->count) {
+               err = -ENOMEM;
+               goto out_bitmap;
+diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/qp.c
++++ b/drivers/infiniband/hw/mlx5/qp.c
+@@ -340,14 +340,57 @@ static int qp_has_rq(struct ib_qp_init_attr *attr)
+       return 1;
+ }
++static int first_med_uuar(void)
++{
++      return 1;
++}
++
++static int next_uuar(int n)
++{
++      n++;
++
++      while (((n % 4) & 2))
++              n++;
++
++      return n;
++}
++
++static int num_med_uuar(struct mlx5_uuar_info *uuari)
++{
++      int n;
++
++      n = uuari->num_uars * MLX5_NON_FP_BF_REGS_PER_PAGE -
++              uuari->num_low_latency_uuars - 1;
++
++      return n >= 0 ? n : 0;
++}
++
++static int max_uuari(struct mlx5_uuar_info *uuari)
++{
++      return uuari->num_uars * 4;
++}
++
++static int first_hi_uuar(struct mlx5_uuar_info *uuari)
++{
++      int med;
++      int i;
++      int t;
++
++      med = num_med_uuar(uuari);
++      for (t = 0, i = first_med_uuar();; i = next_uuar(i)) {
++              t++;
++              if (t == med)
++                      return next_uuar(i);
++      }
++
++      return 0;
++}
++
+ static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari)
+ {
+-      int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
+-      int start_uuar;
+       int i;
+-      start_uuar = nuuars - uuari->num_low_latency_uuars;
+-      for (i = start_uuar; i < nuuars; i++) {
++      for (i = first_hi_uuar(uuari); i < max_uuari(uuari); i = next_uuar(i)) {
+               if (!test_bit(i, uuari->bitmap)) {
+                       set_bit(i, uuari->bitmap);
+                       uuari->count[i]++;
+@@ -360,19 +403,10 @@ static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari)
+ static int alloc_med_class_uuar(struct mlx5_uuar_info *uuari)
+ {
+-      int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
+-      int minidx = 1;
+-      int uuarn;
+-      int end;
++      int minidx = first_med_uuar();
+       int i;
+-      end = nuuars - uuari->num_low_latency_uuars;
+-
+-      for (i = 1; i < end; i++) {
+-              uuarn = i & 3;
+-              if (uuarn == 2 || uuarn == 3)
+-                      continue;
+-
++      for (i = first_med_uuar(); i < first_hi_uuar(uuari); i = next_uuar(i)) {
+               if (uuari->count[i] < uuari->count[minidx])
+                       minidx = i;
+       }
+@@ -510,11 +544,16 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+       uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
+       if (uuarn < 0) {
+               mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
+-              mlx5_ib_dbg(dev, "reverting to high latency\n");
+-              uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
++              mlx5_ib_dbg(dev, "reverting to medium latency\n");
++              uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM);
+               if (uuarn < 0) {
+-                      mlx5_ib_dbg(dev, "uuar allocation failed\n");
+-                      return uuarn;
++                      mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n");
++                      mlx5_ib_dbg(dev, "reverting to high latency\n");
++                      uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
++                      if (uuarn < 0) {
++                              mlx5_ib_warn(dev, "uuar allocation failed\n");
++                              return uuarn;
++                      }
+               }
+       }
+diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/include/linux/mlx5/device.h
++++ b/include/linux/mlx5/device.h
+@@ -104,9 +104,10 @@ enum {
+ };
+ enum {
+-      MLX5_BF_REGS_PER_PAGE   = 4,
+-      MLX5_MAX_UAR_PAGES      = 1 << 8,
+-      MLX5_MAX_UUARS          = MLX5_MAX_UAR_PAGES * MLX5_BF_REGS_PER_PAGE,
++      MLX5_BF_REGS_PER_PAGE           = 4,
++      MLX5_MAX_UAR_PAGES              = 1 << 8,
++      MLX5_NON_FP_BF_REGS_PER_PAGE    = 2,
++      MLX5_MAX_UUARS  = MLX5_MAX_UAR_PAGES * MLX5_NON_FP_BF_REGS_PER_PAGE,
+ };
+ enum {
diff --git a/linux-next-cherry-picks/0031-mlx5_core-Fix-out-arg-size-in-access_register-comman.patch b/linux-next-cherry-picks/0031-mlx5_core-Fix-out-arg-size-in-access_register-comman.patch
new file mode 100644 (file)
index 0000000..aa71a3b
--- /dev/null
@@ -0,0 +1,26 @@
+From: Haggai Eran <haggaie@mellanox.com>
+Subject: [PATCH] mlx5_core: Fix out arg size in access_register command
+
+The output size should be the sum of the core access reg output struct
+plus the size of the specific register data provided by the caller.
+
+Signed-off-by: Haggai Eran <haggaie@mellanox.com>
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/port.c |    2 +-
+ 1 files changed, 1 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
+@@ -57,7 +57,7 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
+       in->arg = cpu_to_be32(arg);
+       in->register_id = cpu_to_be16(reg_num);
+       err = mlx5_cmd_exec(dev, in, sizeof(*in) + size_in, out,
+-                          sizeof(out) + size_out);
++                          sizeof(*out) + size_out);
+       if (err)
+               goto ex2;
diff --git a/linux-next-cherry-picks/0032-IB-mlx5-Clear-out-struct-before-create-QP-command.patch b/linux-next-cherry-picks/0032-IB-mlx5-Clear-out-struct-before-create-QP-command.patch
new file mode 100644 (file)
index 0000000..480650b
--- /dev/null
@@ -0,0 +1,25 @@
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Clear out struct before create QP command
+
+Output structs are expected by firmware to be cleared when a command is called.
+Clear the "out" struct instead of "dout" which is used only later.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/qp.c |    2 +-
+ 1 files changed, 1 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+@@ -74,7 +74,7 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
+       struct mlx5_destroy_qp_mbox_out dout;
+       int err;
+-      memset(&dout, 0, sizeof(dout));
++      memset(&out, 0, sizeof(out));
+       in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_QP);
+       err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
diff --git a/linux-next-cherry-picks/0033-mlx5_core-Use-mlx5-core-style-warning.patch b/linux-next-cherry-picks/0033-mlx5_core-Use-mlx5-core-style-warning.patch
new file mode 100644 (file)
index 0000000..5a14d77
--- /dev/null
@@ -0,0 +1,26 @@
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] mlx5_core: Use mlx5 core style warning
+
+Use mlx5_core_warn(), which is the standard warning emitter function, instead
+of pr_warn().
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/qp.c |    3 ++-
+ 1 files changed, 2 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+@@ -84,7 +84,8 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
+       }
+       if (out.hdr.status) {
+-              pr_warn("current num of QPs 0x%x\n", atomic_read(&dev->num_qps));
++              mlx5_core_warn(dev, "current num of QPs 0x%x\n",
++                             atomic_read(&dev->num_qps));
+               return mlx5_cmd_status_to_err(&out.hdr);
+       }
diff --git a/linux-next-cherry-picks/0034-IB-mlx5-Make-sure-doorbell-record-is-visible-before-.patch b/linux-next-cherry-picks/0034-IB-mlx5-Make-sure-doorbell-record-is-visible-before-.patch
new file mode 100644 (file)
index 0000000..756bad8
--- /dev/null
@@ -0,0 +1,27 @@
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Make sure doorbell record is visible before doorbell
+
+Put a wmb() to make sure the doorbell record is visible to the HCA before we
+hit doorbell.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/qp.c |    4 ++++
+ 1 files changed, 4 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/qp.c
++++ b/drivers/infiniband/hw/mlx5/qp.c
+@@ -2251,6 +2251,10 @@ out:
+               qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
++              /* Make sure doorbell record is visible to the HCA before
++               * we hit doorbell */
++              wmb();
++
+               if (bf->need_lock)
+                       spin_lock(&bf->lock);
diff --git a/linux-next-cherry-picks/0035-IB-mlx5-Implement-modify-CQ.patch b/linux-next-cherry-picks/0035-IB-mlx5-Implement-modify-CQ.patch
new file mode 100644 (file)
index 0000000..932c35d
--- /dev/null
@@ -0,0 +1,148 @@
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Implement modify CQ
+
+Modify CQ is used by ULPs like IPoIB to change moderation parameters.  This
+patch adds support in mlx5.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/cq.c              |   26 +++++++++++++++++++++++++-
+ drivers/net/ethernet/mellanox/mlx5/core/cq.c |   17 +++++++++++++++--
+ include/linux/mlx5/cq.h                      |    8 ++++----
+ include/linux/mlx5/device.h                  |   15 +++++++++++++++
+ 4 files changed, 59 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/cq.c
++++ b/drivers/infiniband/hw/mlx5/cq.c
+@@ -818,7 +818,31 @@ void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq)
+ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
+ {
+-      return -ENOSYS;
++      struct mlx5_modify_cq_mbox_in *in;
++      struct mlx5_ib_dev *dev = to_mdev(cq->device);
++      struct mlx5_ib_cq *mcq = to_mcq(cq);
++      int err;
++      u32 fsel;
++
++      if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_CQ_MODER))
++              return -ENOSYS;
++
++      in = kzalloc(sizeof(*in), GFP_KERNEL);
++      if (!in)
++              return -ENOMEM;
++
++      in->cqn = cpu_to_be32(mcq->mcq.cqn);
++      fsel = (MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT);
++      in->ctx.cq_period = cpu_to_be16(cq_period);
++      in->ctx.cq_max_count = cpu_to_be16(cq_count);
++      in->field_select = cpu_to_be32(fsel);
++      err = mlx5_core_modify_cq(&dev->mdev, &mcq->mcq, in);
++      kfree(in);
++
++      if (err)
++              mlx5_ib_warn(dev, "modify cq 0x%x failed\n", mcq->mcq.cqn);
++
++      return err;
+ }
+ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+@@ -201,10 +201,23 @@ EXPORT_SYMBOL(mlx5_core_query_cq);
+ int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+-                      int type, struct mlx5_cq_modify_params *params)
++                      struct mlx5_modify_cq_mbox_in *in)
+ {
+-      return -ENOSYS;
++      struct mlx5_modify_cq_mbox_out out;
++      int err;
++
++      memset(&out, 0, sizeof(out));
++      in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MODIFY_CQ);
++      err = mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out));
++      if (err)
++              return err;
++
++      if (out.hdr.status)
++              return mlx5_cmd_status_to_err(&out.hdr);
++
++      return 0;
+ }
++EXPORT_SYMBOL(mlx5_core_modify_cq);
+ int mlx5_init_cq_table(struct mlx5_core_dev *dev)
+ {
+diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/include/linux/mlx5/cq.h
++++ b/include/linux/mlx5/cq.h
+@@ -85,9 +85,9 @@ enum {
+ };
+ enum {
+-      MLX5_CQ_MODIFY_RESEIZE = 0,
+-      MLX5_CQ_MODIFY_MODER = 1,
+-      MLX5_CQ_MODIFY_MAPPING = 2,
++      MLX5_CQ_MODIFY_PERIOD   = 1 << 0,
++      MLX5_CQ_MODIFY_COUNT    = 1 << 1,
++      MLX5_CQ_MODIFY_OVERRUN  = 1 << 2,
+ };
+ struct mlx5_cq_modify_params {
+@@ -158,7 +158,7 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
+ int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+                      struct mlx5_query_cq_mbox_out *out);
+ int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+-                      int type, struct mlx5_cq_modify_params *params);
++                      struct mlx5_modify_cq_mbox_in *in);
+ int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
+ void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
+diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/include/linux/mlx5/device.h
++++ b/include/linux/mlx5/device.h
+@@ -177,6 +177,7 @@ enum {
+       MLX5_DEV_CAP_FLAG_APM           = 1LL << 17,
+       MLX5_DEV_CAP_FLAG_ATOMIC        = 1LL << 18,
+       MLX5_DEV_CAP_FLAG_ON_DMND_PG    = 1LL << 24,
++      MLX5_DEV_CAP_FLAG_CQ_MODER      = 1LL << 29,
+       MLX5_DEV_CAP_FLAG_RESIZE_SRQ    = 1LL << 32,
+       MLX5_DEV_CAP_FLAG_REMOTE_FENCE  = 1LL << 38,
+       MLX5_DEV_CAP_FLAG_TLP_HINTS     = 1LL << 39,
+@@ -698,6 +699,19 @@ struct mlx5_query_cq_mbox_out {
+       __be64                  pas[0];
+ };
++struct mlx5_modify_cq_mbox_in {
++      struct mlx5_inbox_hdr   hdr;
++      __be32                  cqn;
++      __be32                  field_select;
++      struct mlx5_cq_context  ctx;
++      u8                      rsvd[192];
++      __be64                  pas[0];
++};
++
++struct mlx5_modify_cq_mbox_out {
++      struct mlx5_outbox_hdr  hdr;
++};
++
+ struct mlx5_enable_hca_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       u8                      rsvd[8];
+@@ -872,6 +886,7 @@ struct mlx5_modify_mkey_mbox_in {
+ struct mlx5_modify_mkey_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
++      u8                      rsvd[8];
+ };
+ struct mlx5_dump_mkey_mbox_in {
diff --git a/linux-next-cherry-picks/0036-IB-mlx5-Add-support-for-resize-CQ.patch b/linux-next-cherry-picks/0036-IB-mlx5-Add-support-for-resize-CQ.patch
new file mode 100644 (file)
index 0000000..e5a01a6
--- /dev/null
@@ -0,0 +1,504 @@
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Add support for resize CQ
+
+Implement resize CQ which is a mandatory verb in mlx5.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/cq.c              |  282 ++++++++++++++++++++++++--
+ drivers/infiniband/hw/mlx5/mlx5_ib.h         |    3 +-
+ drivers/infiniband/hw/mlx5/user.h            |    3 +
+ drivers/net/ethernet/mellanox/mlx5/core/cq.c |    4 +-
+ include/linux/mlx5/cq.h                      |   12 +-
+ include/linux/mlx5/device.h                  |    2 +
+ 6 files changed, 284 insertions(+), 22 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/cq.c
++++ b/drivers/infiniband/hw/mlx5/cq.c
+@@ -73,14 +73,24 @@ static void *get_cqe(struct mlx5_ib_cq *cq, int n)
+       return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz);
+ }
++static u8 sw_ownership_bit(int n, int nent)
++{
++      return (n & nent) ? 1 : 0;
++}
++
+ static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n)
+ {
+       void *cqe = get_cqe(cq, n & cq->ibcq.cqe);
+       struct mlx5_cqe64 *cqe64;
+       cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
+-      return ((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^
+-              !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;
++
++      if (likely((cqe64->op_own) >> 4 != MLX5_CQE_INVALID) &&
++          !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1)))) {
++              return cqe;
++      } else {
++              return NULL;
++      }
+ }
+ static void *next_cqe_sw(struct mlx5_ib_cq *cq)
+@@ -351,6 +361,11 @@ static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
+       qp->sq.last_poll = tail;
+ }
++static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
++{
++      mlx5_buf_free(&dev->mdev, &buf->buf);
++}
++
+ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
+                        struct mlx5_ib_qp **cur_qp,
+                        struct ib_wc *wc)
+@@ -366,6 +381,7 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
+       void *cqe;
+       int idx;
++repoll:
+       cqe = next_cqe_sw(cq);
+       if (!cqe)
+               return -EAGAIN;
+@@ -379,7 +395,18 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
+        */
+       rmb();
+-      /* TBD: resize CQ */
++      opcode = cqe64->op_own >> 4;
++      if (unlikely(opcode == MLX5_CQE_RESIZE_CQ)) {
++              if (likely(cq->resize_buf)) {
++                      free_cq_buf(dev, &cq->buf);
++                      cq->buf = *cq->resize_buf;
++                      kfree(cq->resize_buf);
++                      cq->resize_buf = NULL;
++                      goto repoll;
++              } else {
++                      mlx5_ib_warn(dev, "unexpected resize cqe\n");
++              }
++      }
+       qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff;
+       if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) {
+@@ -398,7 +425,6 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
+       }
+       wc->qp  = &(*cur_qp)->ibqp;
+-      opcode = cqe64->op_own >> 4;
+       switch (opcode) {
+       case MLX5_CQE_REQ:
+               wq = &(*cur_qp)->sq;
+@@ -503,15 +529,11 @@ static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf,
+               return err;
+       buf->cqe_size = cqe_size;
++      buf->nent = nent;
+       return 0;
+ }
+-static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
+-{
+-      mlx5_buf_free(&dev->mdev, &buf->buf);
+-}
+-
+ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
+                         struct ib_ucontext *context, struct mlx5_ib_cq *cq,
+                         int entries, struct mlx5_create_cq_mbox_in **cqb,
+@@ -576,16 +598,16 @@ static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context)
+       ib_umem_release(cq->buf.umem);
+ }
+-static void init_cq_buf(struct mlx5_ib_cq *cq, int nent)
++static void init_cq_buf(struct mlx5_ib_cq *cq, struct mlx5_ib_cq_buf *buf)
+ {
+       int i;
+       void *cqe;
+       struct mlx5_cqe64 *cqe64;
+-      for (i = 0; i < nent; i++) {
+-              cqe = get_cqe(cq, i);
+-              cqe64 = (cq->buf.cqe_size == 64) ? cqe : cqe + 64;
+-              cqe64->op_own = 0xf1;
++      for (i = 0; i < buf->nent; i++) {
++              cqe = get_cqe_from_buf(buf, i, buf->cqe_size);
++              cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64;
++              cqe64->op_own = MLX5_CQE_INVALID << 4;
+       }
+ }
+@@ -610,7 +632,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
+       if (err)
+               goto err_db;
+-      init_cq_buf(cq, entries);
++      init_cq_buf(cq, &cq->buf);
+       *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * cq->buf.buf.npages;
+       *cqb = mlx5_vzalloc(*inlen);
+@@ -836,7 +858,7 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
+       in->ctx.cq_period = cpu_to_be16(cq_period);
+       in->ctx.cq_max_count = cpu_to_be16(cq_count);
+       in->field_select = cpu_to_be32(fsel);
+-      err = mlx5_core_modify_cq(&dev->mdev, &mcq->mcq, in);
++      err = mlx5_core_modify_cq(&dev->mdev, &mcq->mcq, in, sizeof(*in));
+       kfree(in);
+       if (err)
+@@ -845,9 +867,235 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
+       return err;
+ }
++static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
++                     int entries, struct ib_udata *udata, int *npas,
++                     int *page_shift, int *cqe_size)
++{
++      struct mlx5_ib_resize_cq ucmd;
++      struct ib_umem *umem;
++      int err;
++      int npages;
++      struct ib_ucontext *context = cq->buf.umem->context;
++
++      if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
++              return -EFAULT;
++
++      umem = ib_umem_get(context, ucmd.buf_addr, entries * ucmd.cqe_size,
++                         IB_ACCESS_LOCAL_WRITE, 1);
++      if (IS_ERR(umem)) {
++              err = PTR_ERR(umem);
++              return err;
++      }
++
++      mlx5_ib_cont_pages(umem, ucmd.buf_addr, &npages, page_shift,
++                         npas, NULL);
++
++      cq->resize_umem = umem;
++      *cqe_size = ucmd.cqe_size;
++
++      return 0;
++}
++
++static void un_resize_user(struct mlx5_ib_cq *cq)
++{
++      ib_umem_release(cq->resize_umem);
++}
++
++static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
++                       int entries, int cqe_size)
++{
++      int err;
++
++      cq->resize_buf = kzalloc(sizeof(*cq->resize_buf), GFP_KERNEL);
++      if (!cq->resize_buf)
++              return -ENOMEM;
++
++      err = alloc_cq_buf(dev, cq->resize_buf, entries, cqe_size);
++      if (err)
++              goto ex;
++
++      init_cq_buf(cq, cq->resize_buf);
++
++      return 0;
++
++ex:
++      kfree(cq->resize_buf);
++      return err;
++}
++
++static void un_resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq)
++{
++      free_cq_buf(dev, cq->resize_buf);
++      cq->resize_buf = NULL;
++}
++
++static int copy_resize_cqes(struct mlx5_ib_cq *cq)
++{
++      struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
++      struct mlx5_cqe64 *scqe64;
++      struct mlx5_cqe64 *dcqe64;
++      void *start_cqe;
++      void *scqe;
++      void *dcqe;
++      int ssize;
++      int dsize;
++      int i;
++      u8 sw_own;
++
++      ssize = cq->buf.cqe_size;
++      dsize = cq->resize_buf->cqe_size;
++      if (ssize != dsize) {
++              mlx5_ib_warn(dev, "resize from different cqe size is not supported\n");
++              return -EINVAL;
++      }
++
++      i = cq->mcq.cons_index;
++      scqe = get_sw_cqe(cq, i);
++      scqe64 = ssize == 64 ? scqe : scqe + 64;
++      start_cqe = scqe;
++      if (!scqe) {
++              mlx5_ib_warn(dev, "expected cqe in sw ownership\n");
++              return -EINVAL;
++      }
++
++      while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) {
++              dcqe = get_cqe_from_buf(cq->resize_buf,
++                                      (i + 1) & (cq->resize_buf->nent),
++                                      dsize);
++              dcqe64 = dsize == 64 ? dcqe : dcqe + 64;
++              sw_own = sw_ownership_bit(i + 1, cq->resize_buf->nent);
++              memcpy(dcqe, scqe, dsize);
++              dcqe64->op_own = (dcqe64->op_own & ~MLX5_CQE_OWNER_MASK) | sw_own;
++
++              ++i;
++              scqe = get_sw_cqe(cq, i);
++              scqe64 = ssize == 64 ? scqe : scqe + 64;
++              if (!scqe) {
++                      mlx5_ib_warn(dev, "expected cqe in sw ownership\n");
++                      return -EINVAL;
++              }
++
++              if (scqe == start_cqe) {
++                      pr_warn("resize CQ failed to get resize CQE, CQN 0x%x\n",
++                              cq->mcq.cqn);
++                      return -ENOMEM;
++              }
++      }
++      ++cq->mcq.cons_index;
++      return 0;
++}
++
+ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
+ {
+-      return -ENOSYS;
++      struct mlx5_ib_dev *dev = to_mdev(ibcq->device);
++      struct mlx5_ib_cq *cq = to_mcq(ibcq);
++      struct mlx5_modify_cq_mbox_in *in;
++      int err;
++      int npas;
++      int page_shift;
++      int inlen;
++      int uninitialized_var(cqe_size);
++      unsigned long flags;
++
++      if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_RESIZE_CQ)) {
++              pr_info("Firmware does not support resize CQ\n");
++              return -ENOSYS;
++      }
++
++      if (entries < 1)
++              return -EINVAL;
++
++      entries = roundup_pow_of_two(entries + 1);
++      if (entries > dev->mdev.caps.max_cqes + 1)
++              return -EINVAL;
++
++      if (entries == ibcq->cqe + 1)
++              return 0;
++
++      mutex_lock(&cq->resize_mutex);
++      if (udata) {
++              err = resize_user(dev, cq, entries, udata, &npas, &page_shift,
++                                &cqe_size);
++      } else {
++              cqe_size = 64;
++              err = resize_kernel(dev, cq, entries, cqe_size);
++              if (!err) {
++                      npas = cq->resize_buf->buf.npages;
++                      page_shift = cq->resize_buf->buf.page_shift;
++              }
++      }
++
++      if (err)
++              goto ex;
++
++      inlen = sizeof(*in) + npas * sizeof(in->pas[0]);
++      in = mlx5_vzalloc(inlen);
++      if (!in) {
++              err = -ENOMEM;
++              goto ex_resize;
++      }
++
++      if (udata)
++              mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift,
++                                   in->pas, 0);
++      else
++              mlx5_fill_page_array(&cq->resize_buf->buf, in->pas);
++
++      in->field_select = cpu_to_be32(MLX5_MODIFY_CQ_MASK_LOG_SIZE  |
++                                     MLX5_MODIFY_CQ_MASK_PG_OFFSET |
++                                     MLX5_MODIFY_CQ_MASK_PG_SIZE);
++      in->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
++      in->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
++      in->ctx.page_offset = 0;
++      in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(entries) << 24);
++      in->hdr.opmod = cpu_to_be16(MLX5_CQ_OPMOD_RESIZE);
++      in->cqn = cpu_to_be32(cq->mcq.cqn);
++
++      err = mlx5_core_modify_cq(&dev->mdev, &cq->mcq, in, inlen);
++      if (err)
++              goto ex_alloc;
++
++      if (udata) {
++              cq->ibcq.cqe = entries - 1;
++              ib_umem_release(cq->buf.umem);
++              cq->buf.umem = cq->resize_umem;
++              cq->resize_umem = NULL;
++      } else {
++              struct mlx5_ib_cq_buf tbuf;
++              int resized = 0;
++
++              spin_lock_irqsave(&cq->lock, flags);
++              if (cq->resize_buf) {
++                      err = copy_resize_cqes(cq);
++                      if (!err) {
++                              tbuf = cq->buf;
++                              cq->buf = *cq->resize_buf;
++                              kfree(cq->resize_buf);
++                              cq->resize_buf = NULL;
++                              resized = 1;
++                      }
++              }
++              cq->ibcq.cqe = entries - 1;
++              spin_unlock_irqrestore(&cq->lock, flags);
++              if (resized)
++                      free_cq_buf(dev, &tbuf);
++      }
++      mutex_unlock(&cq->resize_mutex);
++
++      mlx5_vfree(in);
++      return 0;
++
++ex_alloc:
++      mlx5_vfree(in);
++
++ex_resize:
++      if (udata)
++              un_resize_user(cq);
++      else
++              un_resize_kernel(dev, cq);
++ex:
++      mutex_unlock(&cq->resize_mutex);
++      return err;
+ }
+ int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq)
+diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
++++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
+@@ -195,6 +195,7 @@ struct mlx5_ib_cq_buf {
+       struct mlx5_buf         buf;
+       struct ib_umem          *umem;
+       int                     cqe_size;
++      int                     nent;
+ };
+ enum mlx5_ib_qp_flags {
+@@ -220,7 +221,7 @@ struct mlx5_ib_cq {
+       /* protect resize cq
+        */
+       struct mutex            resize_mutex;
+-      struct mlx5_ib_cq_resize *resize_buf;
++      struct mlx5_ib_cq_buf  *resize_buf;
+       struct ib_umem         *resize_umem;
+       int                     cqe_size;
+ };
+diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/user.h
++++ b/drivers/infiniband/hw/mlx5/user.h
+@@ -93,6 +93,9 @@ struct mlx5_ib_create_cq_resp {
+ struct mlx5_ib_resize_cq {
+       __u64   buf_addr;
++      __u16   cqe_size;
++      __u16   reserved0;
++      __u32   reserved1;
+ };
+ struct mlx5_ib_create_srq {
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+@@ -201,14 +201,14 @@ EXPORT_SYMBOL(mlx5_core_query_cq);
+ int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+-                      struct mlx5_modify_cq_mbox_in *in)
++                      struct mlx5_modify_cq_mbox_in *in, int in_sz)
+ {
+       struct mlx5_modify_cq_mbox_out out;
+       int err;
+       memset(&out, 0, sizeof(out));
+       in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MODIFY_CQ);
+-      err = mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out));
++      err = mlx5_cmd_exec(dev, in, in_sz, &out, sizeof(out));
+       if (err)
+               return err;
+diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/include/linux/mlx5/cq.h
++++ b/include/linux/mlx5/cq.h
+@@ -79,9 +79,10 @@ enum {
+       MLX5_CQE_RESP_SEND      = 2,
+       MLX5_CQE_RESP_SEND_IMM  = 3,
+       MLX5_CQE_RESP_SEND_INV  = 4,
+-      MLX5_CQE_RESIZE_CQ      = 0xff, /* TBD */
++      MLX5_CQE_RESIZE_CQ      = 5,
+       MLX5_CQE_REQ_ERR        = 13,
+       MLX5_CQE_RESP_ERR       = 14,
++      MLX5_CQE_INVALID        = 15,
+ };
+ enum {
+@@ -90,6 +91,13 @@ enum {
+       MLX5_CQ_MODIFY_OVERRUN  = 1 << 2,
+ };
++enum {
++      MLX5_CQ_OPMOD_RESIZE            = 1,
++      MLX5_MODIFY_CQ_MASK_LOG_SIZE    = 1 << 0,
++      MLX5_MODIFY_CQ_MASK_PG_OFFSET   = 1 << 1,
++      MLX5_MODIFY_CQ_MASK_PG_SIZE     = 1 << 2,
++};
++
+ struct mlx5_cq_modify_params {
+       int     type;
+       union {
+@@ -158,7 +166,7 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
+ int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+                      struct mlx5_query_cq_mbox_out *out);
+ int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+-                      struct mlx5_modify_cq_mbox_in *in);
++                      struct mlx5_modify_cq_mbox_in *in, int in_sz);
+ int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
+ void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
+diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/include/linux/mlx5/device.h
++++ b/include/linux/mlx5/device.h
+@@ -178,6 +178,7 @@ enum {
+       MLX5_DEV_CAP_FLAG_ATOMIC        = 1LL << 18,
+       MLX5_DEV_CAP_FLAG_ON_DMND_PG    = 1LL << 24,
+       MLX5_DEV_CAP_FLAG_CQ_MODER      = 1LL << 29,
++      MLX5_DEV_CAP_FLAG_RESIZE_CQ     = 1LL << 30,
+       MLX5_DEV_CAP_FLAG_RESIZE_SRQ    = 1LL << 32,
+       MLX5_DEV_CAP_FLAG_REMOTE_FENCE  = 1LL << 38,
+       MLX5_DEV_CAP_FLAG_TLP_HINTS     = 1LL << 39,
+@@ -710,6 +711,7 @@ struct mlx5_modify_cq_mbox_in {
+ struct mlx5_modify_cq_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
++      u8                      rsvd[8];
+ };
+ struct mlx5_enable_hca_mbox_in {
diff --git a/linux-next-cherry-picks/0037-mlx5_core-Improve-debugfs-readability.patch b/linux-next-cherry-picks/0037-mlx5_core-Improve-debugfs-readability.patch
new file mode 100644 (file)
index 0000000..56dbf27
--- /dev/null
@@ -0,0 +1,155 @@
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] mlx5_core: Improve debugfs readability
+
+Use strings to display transport service or state of QPs.  Use numeric
+value for MTU of a QP.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/debugfs.c |   39 +++++++++++++++---
+ include/linux/mlx5/qp.h                           |   45 +++++++++++++++++++++
+ 2 files changed, 78 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+@@ -275,7 +275,7 @@ void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev)
+ }
+ static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
+-                       int index)
++                       int index, int *is_str)
+ {
+       struct mlx5_query_qp_mbox_out *out;
+       struct mlx5_qp_context *ctx;
+@@ -293,19 +293,40 @@ static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
+               goto out;
+       }
++      *is_str = 0;
+       ctx = &out->ctx;
+       switch (index) {
+       case QP_PID:
+               param = qp->pid;
+               break;
+       case QP_STATE:
+-              param = be32_to_cpu(ctx->flags) >> 28;
++              param = (u64)mlx5_qp_state_str(be32_to_cpu(ctx->flags) >> 28);
++              *is_str = 1;
+               break;
+       case QP_XPORT:
+-              param = (be32_to_cpu(ctx->flags) >> 16) & 0xff;
++              param = (u64)mlx5_qp_type_str((be32_to_cpu(ctx->flags) >> 16) & 0xff);
++              *is_str = 1;
+               break;
+       case QP_MTU:
+-              param = ctx->mtu_msgmax >> 5;
++              switch (ctx->mtu_msgmax >> 5) {
++              case IB_MTU_256:
++                      param = 256;
++                      break;
++              case IB_MTU_512:
++                      param = 512;
++                      break;
++              case IB_MTU_1024:
++                      param = 1024;
++                      break;
++              case IB_MTU_2048:
++                      param = 2048;
++                      break;
++              case IB_MTU_4096:
++                      param = 4096;
++                      break;
++              default:
++                      param = 0;
++              }
+               break;
+       case QP_N_RECV:
+               param = 1 << ((ctx->rq_size_stride >> 3) & 0xf);
+@@ -414,6 +435,7 @@ static ssize_t dbg_read(struct file *filp, char __user *buf, size_t count,
+       struct mlx5_field_desc *desc;
+       struct mlx5_rsc_debug *d;
+       char tbuf[18];
++      int is_str = 0;
+       u64 field;
+       int ret;
+@@ -424,7 +446,7 @@ static ssize_t dbg_read(struct file *filp, char __user *buf, size_t count,
+       d = (void *)(desc - desc->i) - sizeof(*d);
+       switch (d->type) {
+       case MLX5_DBG_RSC_QP:
+-              field = qp_read_field(d->dev, d->object, desc->i);
++              field = qp_read_field(d->dev, d->object, desc->i, &is_str);
+               break;
+       case MLX5_DBG_RSC_EQ:
+@@ -440,7 +462,12 @@ static ssize_t dbg_read(struct file *filp, char __user *buf, size_t count,
+               return -EINVAL;
+       }
+-      ret = snprintf(tbuf, sizeof(tbuf), "0x%llx\n", field);
++
++      if (is_str)
++              ret = snprintf(tbuf, sizeof(tbuf), "%s\n", (const char *)field);
++      else
++              ret = snprintf(tbuf, sizeof(tbuf), "0x%llx\n", field);
++
+       if (ret > 0) {
+               if (copy_to_user(buf, tbuf, ret))
+                       return -EFAULT;
+diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/include/linux/mlx5/qp.h
++++ b/include/linux/mlx5/qp.h
+@@ -464,4 +464,49 @@ void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev);
+ int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
+ void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
++static inline const char *mlx5_qp_type_str(int type)
++{
++      switch (type) {
++      case MLX5_QP_ST_RC: return "RC";
++      case MLX5_QP_ST_UC: return "C";
++      case MLX5_QP_ST_UD: return "UD";
++      case MLX5_QP_ST_XRC: return "XRC";
++      case MLX5_QP_ST_MLX: return "MLX";
++      case MLX5_QP_ST_QP0: return "QP0";
++      case MLX5_QP_ST_QP1: return "QP1";
++      case MLX5_QP_ST_RAW_ETHERTYPE: return "RAW_ETHERTYPE";
++      case MLX5_QP_ST_RAW_IPV6: return "RAW_IPV6";
++      case MLX5_QP_ST_SNIFFER: return "SNIFFER";
++      case MLX5_QP_ST_SYNC_UMR: return "SYNC_UMR";
++      case MLX5_QP_ST_PTP_1588: return "PTP_1588";
++      case MLX5_QP_ST_REG_UMR: return "REG_UMR";
++      default: return "Invalid transport type";
++      }
++}
++
++static inline const char *mlx5_qp_state_str(int state)
++{
++      switch (state) {
++      case MLX5_QP_STATE_RST:
++      return "RST";
++      case MLX5_QP_STATE_INIT:
++      return "INIT";
++      case MLX5_QP_STATE_RTR:
++      return "RTR";
++      case MLX5_QP_STATE_RTS:
++      return "RTS";
++      case MLX5_QP_STATE_SQER:
++      return "SQER";
++      case MLX5_QP_STATE_SQD:
++      return "SQD";
++      case MLX5_QP_STATE_ERR:
++      return "ERR";
++      case MLX5_QP_STATE_SQ_DRAINING:
++      return "SQ_DRAINING";
++      case MLX5_QP_STATE_SUSPENDED:
++      return "SUSPENDED";
++      default: return "Invalid QP state";
++      }
++}
++
+ #endif /* MLX5_QP_H */
diff --git a/linux-next-cherry-picks/0038-mlx5_core-Fix-PowerPC-support.patch b/linux-next-cherry-picks/0038-mlx5_core-Fix-PowerPC-support.patch
new file mode 100644 (file)
index 0000000..a2a242b
--- /dev/null
@@ -0,0 +1,67 @@
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] mlx5_core: Fix PowerPC support
+
+1. Fix derivation of sub-page index from the dma address in free_4k.
+2. Fix the DMA address passed to dma_unmap_page by masking it properly.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ .../net/ethernet/mellanox/mlx5/core/pagealloc.c    |    9 +++++----
+ include/linux/mlx5/device.h                        |    3 ++-
+ 2 files changed, 7 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+@@ -99,7 +99,7 @@ enum {
+ enum {
+       MLX5_MAX_RECLAIM_TIME_MILI      = 5000,
+-      MLX5_NUM_4K_IN_PAGE             = PAGE_SIZE / 4096,
++      MLX5_NUM_4K_IN_PAGE             = PAGE_SIZE / MLX5_ADAPTER_PAGE_SIZE,
+ };
+ static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id)
+@@ -206,7 +206,7 @@ static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr)
+       if (!fp->free_count)
+               list_del(&fp->list);
+-      *addr = fp->addr + n * 4096;
++      *addr = fp->addr + n * MLX5_ADAPTER_PAGE_SIZE;
+       return 0;
+ }
+@@ -222,14 +222,15 @@ static void free_4k(struct mlx5_core_dev *dev, u64 addr)
+               return;
+       }
+-      n = (addr & ~PAGE_MASK) % 4096;
++      n = (addr & ~PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT;
+       fwp->free_count++;
+       set_bit(n, &fwp->bitmask);
+       if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) {
+               rb_erase(&fwp->rb_node, &dev->priv.page_root);
+               if (fwp->free_count != 1)
+                       list_del(&fwp->list);
+-              dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
++              dma_unmap_page(&dev->pdev->dev, addr & PAGE_MASK, PAGE_SIZE,
++                             DMA_BIDIRECTIONAL);
+               __free_page(fwp->page);
+               kfree(fwp);
+       } else if (fwp->free_count == 1) {
+diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/include/linux/mlx5/device.h
++++ b/include/linux/mlx5/device.h
+@@ -234,7 +234,8 @@ enum {
+ };
+ enum {
+-      MLX5_ADAPTER_PAGE_SHIFT         = 12
++      MLX5_ADAPTER_PAGE_SHIFT         = 12,
++      MLX5_ADAPTER_PAGE_SIZE          = 1 << MLX5_ADAPTER_PAGE_SHIFT,
+ };
+ enum {
diff --git a/linux-next-cherry-picks/0039-IB-mlx5-Allow-creation-of-QPs-with-zero-length-work-.patch b/linux-next-cherry-picks/0039-IB-mlx5-Allow-creation-of-QPs-with-zero-length-work-.patch
new file mode 100644 (file)
index 0000000..52d7348
--- /dev/null
@@ -0,0 +1,107 @@
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Allow creation of QPs with zero-length work queues
+
+The current code attmepts to call ib_umem_get() even if the length is
+zero, which causes a failure. Since the spec allows zero length work
+queues, change the code so we don't call ib_umem_get() in those cases.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/qp.c |   49 +++++++++++++++++++++++----------------
+ 1 files changed, 29 insertions(+), 20 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/qp.c
++++ b/drivers/infiniband/hw/mlx5/qp.c
+@@ -523,12 +523,12 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ {
+       struct mlx5_ib_ucontext *context;
+       struct mlx5_ib_create_qp ucmd;
+-      int page_shift;
++      int page_shift = 0;
+       int uar_index;
+       int npages;
+-      u32 offset;
++      u32 offset = 0;
+       int uuarn;
+-      int ncont;
++      int ncont = 0;
+       int err;
+       err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
+@@ -564,23 +564,29 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+       if (err)
+               goto err_uuar;
+-      qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
+-                             qp->buf_size, 0, 0);
+-      if (IS_ERR(qp->umem)) {
+-              mlx5_ib_dbg(dev, "umem_get failed\n");
+-              err = PTR_ERR(qp->umem);
+-              goto err_uuar;
++      if (ucmd.buf_addr && qp->buf_size) {
++              qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
++                                     qp->buf_size, 0, 0);
++              if (IS_ERR(qp->umem)) {
++                      mlx5_ib_dbg(dev, "umem_get failed\n");
++                      err = PTR_ERR(qp->umem);
++                      goto err_uuar;
++              }
++      } else {
++              qp->umem = NULL;
+       }
+-      mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift,
+-                         &ncont, NULL);
+-      err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset);
+-      if (err) {
+-              mlx5_ib_warn(dev, "bad offset\n");
+-              goto err_umem;
++      if (qp->umem) {
++              mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift,
++                                 &ncont, NULL);
++              err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset);
++              if (err) {
++                      mlx5_ib_warn(dev, "bad offset\n");
++                      goto err_umem;
++              }
++              mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n",
++                          ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset);
+       }
+-      mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n",
+-                  ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset);
+       *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
+       *in = mlx5_vzalloc(*inlen);
+@@ -588,7 +594,8 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+               err = -ENOMEM;
+               goto err_umem;
+       }
+-      mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
++      if (qp->umem)
++              mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
+       (*in)->ctx.log_pg_sz_remote_qpn =
+               cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
+       (*in)->ctx.params2 = cpu_to_be32(offset << 6);
+@@ -619,7 +626,8 @@ err_free:
+       mlx5_vfree(*in);
+ err_umem:
+-      ib_umem_release(qp->umem);
++      if (qp->umem)
++              ib_umem_release(qp->umem);
+ err_uuar:
+       free_uuar(&context->uuari, uuarn);
+@@ -632,7 +640,8 @@ static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp)
+       context = to_mucontext(pd->uobject->context);
+       mlx5_ib_db_unmap_user(context, &qp->db);
+-      ib_umem_release(qp->umem);
++      if (qp->umem)
++              ib_umem_release(qp->umem);
+       free_uuar(&context->uuari, qp->uuarn);
+ }
diff --git a/linux-next-cherry-picks/0040-IB-mlx5-Abort-driver-cleanup-if-teardown-hca-fails.patch b/linux-next-cherry-picks/0040-IB-mlx5-Abort-driver-cleanup-if-teardown-hca-fails.patch
new file mode 100644 (file)
index 0000000..2ab02fb
--- /dev/null
@@ -0,0 +1,40 @@
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Abort driver cleanup if teardown hca fails
+
+Do not continue with cleanup flow. If this ever happens we can check which
+resources remained open.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/main.c |   10 ++++++++--
+ 1 files changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -460,7 +460,10 @@ disable_msix:
+ err_stop_poll:
+       mlx5_stop_health_poll(dev);
+-      mlx5_cmd_teardown_hca(dev);
++      if (mlx5_cmd_teardown_hca(dev)) {
++              dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
++              return err;
++      }
+ err_pagealloc_stop:
+       mlx5_pagealloc_stop(dev);
+@@ -503,7 +506,10 @@ void mlx5_dev_cleanup(struct mlx5_core_dev *dev)
+       mlx5_eq_cleanup(dev);
+       mlx5_disable_msix(dev);
+       mlx5_stop_health_poll(dev);
+-      mlx5_cmd_teardown_hca(dev);
++      if (mlx5_cmd_teardown_hca(dev)) {
++              dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
++              return;
++      }
+       mlx5_pagealloc_stop(dev);
+       mlx5_reclaim_startup_pages(dev);
+       mlx5_core_disable_hca(dev);
diff --git a/linux-next-cherry-picks/0041-IB-mlx5-Remove-old-field-for-create-mkey-mailbox.patch b/linux-next-cherry-picks/0041-IB-mlx5-Remove-old-field-for-create-mkey-mailbox.patch
new file mode 100644 (file)
index 0000000..270bac2
--- /dev/null
@@ -0,0 +1,26 @@
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Remove old field for create mkey mailbox
+
+Match firmware specification.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ include/linux/mlx5/device.h |    4 ++--
+ 1 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/include/linux/mlx5/device.h
++++ b/include/linux/mlx5/device.h
+@@ -849,8 +849,8 @@ struct mlx5_create_mkey_mbox_in {
+       struct mlx5_mkey_seg    seg;
+       u8                      rsvd1[16];
+       __be32                  xlat_oct_act_size;
+-      __be32                  bsf_coto_act_size;
+-      u8                      rsvd2[168];
++      __be32                  rsvd2;
++      u8                      rsvd3[168];
+       __be64                  pas[0];
+ };
diff --git a/linux-next-cherry-picks/0042-IB-mlx5-Verify-reserved-fields-are-cleared.patch b/linux-next-cherry-picks/0042-IB-mlx5-Verify-reserved-fields-are-cleared.patch
new file mode 100644 (file)
index 0000000..89e7c98
--- /dev/null
@@ -0,0 +1,33 @@
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Verify reserved fields are cleared
+
+Verify that reserved fields in struct mlx5_ib_resize_cq are cleared
+before continuing execution of the verb. This is required to allow
+making use of this area in future revisions.
+
+Signed-off-by: Yann Droneaud <ydroneaud@opteya.com>
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/cq.c |    8 ++++++--
+ 1 files changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/cq.c
++++ b/drivers/infiniband/hw/mlx5/cq.c
+@@ -877,8 +877,12 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
+       int npages;
+       struct ib_ucontext *context = cq->buf.umem->context;
+-      if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
+-              return -EFAULT;
++      err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
++      if (err)
++              return err;
++
++      if (ucmd.reserved0 || ucmd.reserved1)
++              return -EINVAL;
+       umem = ib_umem_get(context, ucmd.buf_addr, entries * ucmd.cqe_size,
+                          IB_ACCESS_LOCAL_WRITE, 1);