--- /dev/null
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Fix check of number of entries in create CQ
+
+Verify that the value is non negative before rounding up to power of 2.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/cq.c | 5 ++++-
+ 1 files changed, 4 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/cq.c
++++ b/drivers/infiniband/hw/mlx5/cq.c
+@@ -653,8 +653,11 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
+ int eqn;
+ int err;
+
++ if (entries < 0)
++ return ERR_PTR(-EINVAL);
++
+ entries = roundup_pow_of_two(entries + 1);
+- if (entries < 1 || entries > dev->mdev.caps.max_cqes)
++ if (entries > dev->mdev.caps.max_cqes)
+ return ERR_PTR(-EINVAL);
+
+ cq = kzalloc(sizeof(*cq), GFP_KERNEL);
--- /dev/null
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Multithreaded create MR
+
+Use asynchronous commands to execute up to eight concurrent create MR
+commands. This is to fill memory caches faster so we keep consuming
+from there. Also, increase timeout for shrinking caches to five
+minutes.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/main.c | 3 +-
+ drivers/infiniband/hw/mlx5/mlx5_ib.h | 6 +
+ drivers/infiniband/hw/mlx5/mr.c | 163 ++++++++++++++++-----
+ drivers/infiniband/hw/mlx5/qp.c | 4 +-
+ drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 106 ++++++++++----
+ drivers/net/ethernet/mellanox/mlx5/core/debugfs.c | 8 +-
+ drivers/net/ethernet/mellanox/mlx5/core/mr.c | 32 +++--
+ include/linux/mlx5/driver.h | 17 ++-
+ 8 files changed, 255 insertions(+), 84 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/main.c
++++ b/drivers/infiniband/hw/mlx5/main.c
+@@ -745,7 +745,8 @@ static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn)
+ seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+ seg->start_addr = 0;
+
+- err = mlx5_core_create_mkey(&dev->mdev, &mr, in, sizeof(*in));
++ err = mlx5_core_create_mkey(&dev->mdev, &mr, in, sizeof(*in),
++ NULL, NULL, NULL);
+ if (err) {
+ mlx5_ib_warn(dev, "failed to create mkey, %d\n", err);
+ goto err_in;
+diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
++++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
+@@ -262,6 +262,9 @@ struct mlx5_ib_mr {
+ int npages;
+ struct completion done;
+ enum ib_wc_status status;
++ struct mlx5_ib_dev *dev;
++ struct mlx5_create_mkey_mbox_out out;
++ unsigned long start;
+ };
+
+ struct mlx5_ib_fast_reg_page_list {
+@@ -323,6 +326,7 @@ struct mlx5_cache_ent {
+ struct mlx5_ib_dev *dev;
+ struct work_struct work;
+ struct delayed_work dwork;
++ int pending;
+ };
+
+ struct mlx5_mr_cache {
+@@ -358,6 +362,8 @@ struct mlx5_ib_dev {
+ spinlock_t mr_lock;
+ struct mlx5_ib_resources devr;
+ struct mlx5_mr_cache cache;
++ struct timer_list delay_timer;
++ int fill_delay;
+ };
+
+ static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
+diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/mr.c
++++ b/drivers/infiniband/hw/mlx5/mr.c
+@@ -35,11 +35,13 @@
+ #include <linux/random.h>
+ #include <linux/debugfs.h>
+ #include <linux/export.h>
++#include <linux/delay.h>
+ #include <rdma/ib_umem.h>
+ #include "mlx5_ib.h"
+
+ enum {
+ DEF_CACHE_SIZE = 10,
++ MAX_PENDING_REG_MR = 8,
+ };
+
+ enum {
+@@ -63,6 +65,57 @@ static int order2idx(struct mlx5_ib_dev *dev, int order)
+ return order - cache->ent[0].order;
+ }
+
++static void reg_mr_callback(int status, void *context)
++{
++ struct mlx5_ib_mr *mr = context;
++ struct mlx5_ib_dev *dev = mr->dev;
++ struct mlx5_mr_cache *cache = &dev->cache;
++ int c = order2idx(dev, mr->order);
++ struct mlx5_cache_ent *ent = &cache->ent[c];
++ u8 key;
++ unsigned long delta = jiffies - mr->start;
++ unsigned long index;
++ unsigned long flags;
++
++ index = find_last_bit(&delta, 8 * sizeof(delta));
++ if (index == 64)
++ index = 0;
++
++ spin_lock_irqsave(&ent->lock, flags);
++ ent->pending--;
++ spin_unlock_irqrestore(&ent->lock, flags);
++ if (status) {
++ mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
++ kfree(mr);
++ dev->fill_delay = 1;
++ mod_timer(&dev->delay_timer, jiffies + HZ);
++ return;
++ }
++
++ if (mr->out.hdr.status) {
++ mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n",
++ mr->out.hdr.status,
++ be32_to_cpu(mr->out.hdr.syndrome));
++ kfree(mr);
++ dev->fill_delay = 1;
++ mod_timer(&dev->delay_timer, jiffies + HZ);
++ return;
++ }
++
++ spin_lock_irqsave(&dev->mdev.priv.mkey_lock, flags);
++ key = dev->mdev.priv.mkey_key++;
++ spin_unlock_irqrestore(&dev->mdev.priv.mkey_lock, flags);
++ mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key;
++
++ cache->last_add = jiffies;
++
++ spin_lock_irqsave(&ent->lock, flags);
++ list_add_tail(&mr->list, &ent->head);
++ ent->cur++;
++ ent->size++;
++ spin_unlock_irqrestore(&ent->lock, flags);
++}
++
+ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
+ {
+ struct mlx5_mr_cache *cache = &dev->cache;
+@@ -78,36 +131,39 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
+ return -ENOMEM;
+
+ for (i = 0; i < num; i++) {
++ if (ent->pending >= MAX_PENDING_REG_MR) {
++ err = -EAGAIN;
++ break;
++ }
++
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr) {
+ err = -ENOMEM;
+- goto out;
++ break;
+ }
+ mr->order = ent->order;
+ mr->umred = 1;
++ mr->dev = dev;
+ in->seg.status = 1 << 6;
+ in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
+ in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+ in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
+ in->seg.log2_page_size = 12;
+
++ spin_lock_irq(&ent->lock);
++ ent->pending++;
++ spin_unlock_irq(&ent->lock);
++ mr->start = jiffies;
+ err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in,
+- sizeof(*in));
++ sizeof(*in), reg_mr_callback,
++ mr, &mr->out);
+ if (err) {
+ mlx5_ib_warn(dev, "create mkey failed %d\n", err);
+ kfree(mr);
+- goto out;
++ break;
+ }
+- cache->last_add = jiffies;
+-
+- spin_lock(&ent->lock);
+- list_add_tail(&mr->list, &ent->head);
+- ent->cur++;
+- ent->size++;
+- spin_unlock(&ent->lock);
+ }
+
+-out:
+ kfree(in);
+ return err;
+ }
+@@ -121,16 +177,16 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
+ int i;
+
+ for (i = 0; i < num; i++) {
+- spin_lock(&ent->lock);
++ spin_lock_irq(&ent->lock);
+ if (list_empty(&ent->head)) {
+- spin_unlock(&ent->lock);
++ spin_unlock_irq(&ent->lock);
+ return;
+ }
+ mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
+ list_del(&mr->list);
+ ent->cur--;
+ ent->size--;
+- spin_unlock(&ent->lock);
++ spin_unlock_irq(&ent->lock);
+ err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
+ if (err)
+ mlx5_ib_warn(dev, "failed destroy mkey\n");
+@@ -162,9 +218,13 @@ static ssize_t size_write(struct file *filp, const char __user *buf,
+ return -EINVAL;
+
+ if (var > ent->size) {
+- err = add_keys(dev, c, var - ent->size);
+- if (err)
+- return err;
++ do {
++ err = add_keys(dev, c, var - ent->size);
++ if (err && err != -EAGAIN)
++ return err;
++
++ usleep_range(3000, 5000);
++ } while (err);
+ } else if (var < ent->size) {
+ remove_keys(dev, c, ent->size - var);
+ }
+@@ -280,23 +340,37 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
+ struct mlx5_ib_dev *dev = ent->dev;
+ struct mlx5_mr_cache *cache = &dev->cache;
+ int i = order2idx(dev, ent->order);
++ int err;
+
+ if (cache->stopped)
+ return;
+
+ ent = &dev->cache.ent[i];
+- if (ent->cur < 2 * ent->limit) {
+- add_keys(dev, i, 1);
+- if (ent->cur < 2 * ent->limit)
+- queue_work(cache->wq, &ent->work);
++ if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
++ err = add_keys(dev, i, 1);
++ if (ent->cur < 2 * ent->limit) {
++ if (err == -EAGAIN) {
++ mlx5_ib_dbg(dev, "returned eagain, order %d\n",
++ i + 2);
++ queue_delayed_work(cache->wq, &ent->dwork,
++ msecs_to_jiffies(3));
++ } else if (err) {
++ mlx5_ib_warn(dev, "command failed order %d, err %d\n",
++ i + 2, err);
++ queue_delayed_work(cache->wq, &ent->dwork,
++ msecs_to_jiffies(1000));
++ } else {
++ queue_work(cache->wq, &ent->work);
++ }
++ }
+ } else if (ent->cur > 2 * ent->limit) {
+ if (!someone_adding(cache) &&
+- time_after(jiffies, cache->last_add + 60 * HZ)) {
++ time_after(jiffies, cache->last_add + 300 * HZ)) {
+ remove_keys(dev, i, 1);
+ if (ent->cur > ent->limit)
+ queue_work(cache->wq, &ent->work);
+ } else {
+- queue_delayed_work(cache->wq, &ent->dwork, 60 * HZ);
++ queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
+ }
+ }
+ }
+@@ -336,18 +410,18 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
+
+ mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
+
+- spin_lock(&ent->lock);
++ spin_lock_irq(&ent->lock);
+ if (!list_empty(&ent->head)) {
+ mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
+ list);
+ list_del(&mr->list);
+ ent->cur--;
+- spin_unlock(&ent->lock);
++ spin_unlock_irq(&ent->lock);
+ if (ent->cur < ent->limit)
+ queue_work(cache->wq, &ent->work);
+ break;
+ }
+- spin_unlock(&ent->lock);
++ spin_unlock_irq(&ent->lock);
+
+ queue_work(cache->wq, &ent->work);
+
+@@ -374,12 +448,12 @@ static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
+ return;
+ }
+ ent = &cache->ent[c];
+- spin_lock(&ent->lock);
++ spin_lock_irq(&ent->lock);
+ list_add_tail(&mr->list, &ent->head);
+ ent->cur++;
+ if (ent->cur > 2 * ent->limit)
+ shrink = 1;
+- spin_unlock(&ent->lock);
++ spin_unlock_irq(&ent->lock);
+
+ if (shrink)
+ queue_work(cache->wq, &ent->work);
+@@ -394,16 +468,16 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
+
+ cancel_delayed_work(&ent->dwork);
+ while (1) {
+- spin_lock(&ent->lock);
++ spin_lock_irq(&ent->lock);
+ if (list_empty(&ent->head)) {
+- spin_unlock(&ent->lock);
++ spin_unlock_irq(&ent->lock);
+ return;
+ }
+ mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
+ list_del(&mr->list);
+ ent->cur--;
+ ent->size--;
+- spin_unlock(&ent->lock);
++ spin_unlock_irq(&ent->lock);
+ err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
+ if (err)
+ mlx5_ib_warn(dev, "failed destroy mkey\n");
+@@ -464,6 +538,13 @@ static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
+ debugfs_remove_recursive(dev->cache.root);
+ }
+
++static void delay_time_func(unsigned long ctx)
++{
++ struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx;
++
++ dev->fill_delay = 0;
++}
++
+ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
+ {
+ struct mlx5_mr_cache *cache = &dev->cache;
+@@ -479,6 +560,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
+ return -ENOMEM;
+ }
+
++ setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
+ for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
+ INIT_LIST_HEAD(&cache->ent[i].head);
+ spin_lock_init(&cache->ent[i].lock);
+@@ -522,6 +604,7 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
+ clean_keys(dev, i);
+
+ destroy_workqueue(dev->cache.wq);
++ del_timer_sync(&dev->delay_timer);
+
+ return 0;
+ }
+@@ -551,7 +634,8 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
+ seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+ seg->start_addr = 0;
+
+- err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in));
++ err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL,
++ NULL);
+ if (err)
+ goto err_in;
+
+@@ -660,14 +744,14 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
+ int err;
+ int i;
+
+- for (i = 0; i < 10; i++) {
++ for (i = 0; i < 1; i++) {
+ mr = alloc_cached_mr(dev, order);
+ if (mr)
+ break;
+
+ err = add_keys(dev, order2idx(dev, order), 1);
+- if (err) {
+- mlx5_ib_warn(dev, "add_keys failed\n");
++ if (err && err != -EAGAIN) {
++ mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
+ break;
+ }
+ }
+@@ -759,8 +843,10 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
+ in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
+ in->seg.log2_page_size = page_shift;
+ in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+- in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
+- err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen);
++ in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length,
++ 1 << page_shift));
++ err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen, NULL,
++ NULL, NULL);
+ if (err) {
+ mlx5_ib_warn(dev, "create mkey failed\n");
+ goto err_2;
+@@ -944,7 +1030,8 @@ struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
+ * TBD not needed - issue 197292 */
+ in->seg.log2_page_size = PAGE_SHIFT;
+
+- err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in));
++ err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in), NULL,
++ NULL, NULL);
+ kfree(in);
+ if (err)
+ goto err_free;
+diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/qp.c
++++ b/drivers/infiniband/hw/mlx5/qp.c
+@@ -1744,6 +1744,7 @@ static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
+ MLX5_MKEY_MASK_PD |
+ MLX5_MKEY_MASK_LR |
+ MLX5_MKEY_MASK_LW |
++ MLX5_MKEY_MASK_KEY |
+ MLX5_MKEY_MASK_RR |
+ MLX5_MKEY_MASK_RW |
+ MLX5_MKEY_MASK_A |
+@@ -1800,7 +1801,8 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *w
+ seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
+ seg->len = cpu_to_be64(wr->wr.fast_reg.length);
+ seg->log2_page_size = wr->wr.fast_reg.page_shift;
+- seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
++ seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 |
++ mlx5_mkey_variant(wr->wr.fast_reg.rkey));
+ }
+
+ static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg,
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+@@ -98,6 +98,7 @@ enum {
+ static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
+ struct mlx5_cmd_msg *in,
+ struct mlx5_cmd_msg *out,
++ void *uout, int uout_size,
+ mlx5_cmd_cbk_t cbk,
+ void *context, int page_queue)
+ {
+@@ -110,6 +111,8 @@ static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
+
+ ent->in = in;
+ ent->out = out;
++ ent->uout = uout;
++ ent->uout_size = uout_size;
+ ent->callback = cbk;
+ ent->context = context;
+ ent->cmd = cmd;
+@@ -534,6 +537,7 @@ static void cmd_work_handler(struct work_struct *work)
+ ent->lay = lay;
+ memset(lay, 0, sizeof(*lay));
+ memcpy(lay->in, ent->in->first.data, sizeof(lay->in));
++ ent->op = be32_to_cpu(lay->in[0]) >> 16;
+ if (ent->in->next)
+ lay->in_ptr = cpu_to_be64(ent->in->next->dma);
+ lay->inlen = cpu_to_be32(ent->in->len);
+@@ -628,7 +632,8 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
+ * 2. page queue commands do not support asynchrous completion
+ */
+ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
+- struct mlx5_cmd_msg *out, mlx5_cmd_cbk_t callback,
++ struct mlx5_cmd_msg *out, void *uout, int uout_size,
++ mlx5_cmd_cbk_t callback,
+ void *context, int page_queue, u8 *status)
+ {
+ struct mlx5_cmd *cmd = &dev->cmd;
+@@ -642,7 +647,8 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
+ if (callback && page_queue)
+ return -EINVAL;
+
+- ent = alloc_cmd(cmd, in, out, callback, context, page_queue);
++ ent = alloc_cmd(cmd, in, out, uout, uout_size, callback, context,
++ page_queue);
+ if (IS_ERR(ent))
+ return PTR_ERR(ent);
+
+@@ -670,10 +676,10 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
+ op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode);
+ if (op < ARRAY_SIZE(cmd->stats)) {
+ stats = &cmd->stats[op];
+- spin_lock(&stats->lock);
++ spin_lock_irq(&stats->lock);
+ stats->sum += ds;
+ ++stats->n;
+- spin_unlock(&stats->lock);
++ spin_unlock_irq(&stats->lock);
+ }
+ mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME,
+ "fw exec time for %s is %lld nsec\n",
+@@ -826,7 +832,7 @@ static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev,
+ int n;
+ int i;
+
+- msg = kzalloc(sizeof(*msg), GFP_KERNEL);
++ msg = kzalloc(sizeof(*msg), flags);
+ if (!msg)
+ return ERR_PTR(-ENOMEM);
+
+@@ -1109,6 +1115,19 @@ void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
+ up(&cmd->sem);
+ }
+
++static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
++{
++ unsigned long flags;
++
++ if (msg->cache) {
++ spin_lock_irqsave(&msg->cache->lock, flags);
++ list_add_tail(&msg->list, &msg->cache->head);
++ spin_unlock_irqrestore(&msg->cache->lock, flags);
++ } else {
++ mlx5_free_cmd_msg(dev, msg);
++ }
++}
++
+ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector)
+ {
+ struct mlx5_cmd *cmd = &dev->cmd;
+@@ -1117,6 +1136,10 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector)
+ void *context;
+ int err;
+ int i;
++ ktime_t t1, t2, delta;
++ s64 ds;
++ struct mlx5_cmd_stats *stats;
++ unsigned long flags;
+
+ for (i = 0; i < (1 << cmd->log_sz); i++) {
+ if (test_bit(i, &vector)) {
+@@ -1141,9 +1164,29 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector)
+ }
+ free_ent(cmd, ent->idx);
+ if (ent->callback) {
++ t1 = timespec_to_ktime(ent->ts1);
++ t2 = timespec_to_ktime(ent->ts2);
++ delta = ktime_sub(t2, t1);
++ ds = ktime_to_ns(delta);
++ if (ent->op < ARRAY_SIZE(cmd->stats)) {
++ stats = &cmd->stats[ent->op];
++ spin_lock_irqsave(&stats->lock, flags);
++ stats->sum += ds;
++ ++stats->n;
++ spin_unlock_irqrestore(&stats->lock, flags);
++ }
++
+ callback = ent->callback;
+ context = ent->context;
+ err = ent->ret;
++ if (!err)
++ err = mlx5_copy_from_msg(ent->uout,
++ ent->out,
++ ent->uout_size);
++
++ mlx5_free_cmd_msg(dev, ent->out);
++ free_msg(dev, ent->in);
++
+ free_cmd(ent);
+ callback(err, context);
+ } else {
+@@ -1160,7 +1203,8 @@ static int status_to_err(u8 status)
+ return status ? -1 : 0; /* TBD more meaningful codes */
+ }
+
+-static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size)
++static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size,
++ gfp_t gfp)
+ {
+ struct mlx5_cmd_msg *msg = ERR_PTR(-ENOMEM);
+ struct mlx5_cmd *cmd = &dev->cmd;
+@@ -1172,7 +1216,7 @@ static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size)
+ ent = &cmd->cache.med;
+
+ if (ent) {
+- spin_lock(&ent->lock);
++ spin_lock_irq(&ent->lock);
+ if (!list_empty(&ent->head)) {
+ msg = list_entry(ent->head.next, typeof(*msg), list);
+ /* For cached lists, we must explicitly state what is
+@@ -1181,43 +1225,34 @@ static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size)
+ msg->len = in_size;
+ list_del(&msg->list);
+ }
+- spin_unlock(&ent->lock);
++ spin_unlock_irq(&ent->lock);
+ }
+
+ if (IS_ERR(msg))
+- msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, in_size);
++ msg = mlx5_alloc_cmd_msg(dev, gfp, in_size);
+
+ return msg;
+ }
+
+-static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
+-{
+- if (msg->cache) {
+- spin_lock(&msg->cache->lock);
+- list_add_tail(&msg->list, &msg->cache->head);
+- spin_unlock(&msg->cache->lock);
+- } else {
+- mlx5_free_cmd_msg(dev, msg);
+- }
+-}
+-
+ static int is_manage_pages(struct mlx5_inbox_hdr *in)
+ {
+ return be16_to_cpu(in->opcode) == MLX5_CMD_OP_MANAGE_PAGES;
+ }
+
+-int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
+- int out_size)
++static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
++ int out_size, mlx5_cmd_cbk_t callback, void *context)
+ {
+ struct mlx5_cmd_msg *inb;
+ struct mlx5_cmd_msg *outb;
+ int pages_queue;
++ gfp_t gfp;
+ int err;
+ u8 status = 0;
+
+ pages_queue = is_manage_pages(in);
++ gfp = callback ? GFP_ATOMIC : GFP_KERNEL;
+
+- inb = alloc_msg(dev, in_size);
++ inb = alloc_msg(dev, in_size, gfp);
+ if (IS_ERR(inb)) {
+ err = PTR_ERR(inb);
+ return err;
+@@ -1229,13 +1264,14 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
+ goto out_in;
+ }
+
+- outb = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, out_size);
++ outb = mlx5_alloc_cmd_msg(dev, gfp, out_size);
+ if (IS_ERR(outb)) {
+ err = PTR_ERR(outb);
+ goto out_in;
+ }
+
+- err = mlx5_cmd_invoke(dev, inb, outb, NULL, NULL, pages_queue, &status);
++ err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context,
++ pages_queue, &status);
+ if (err)
+ goto out_out;
+
+@@ -1248,14 +1284,30 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
+ err = mlx5_copy_from_msg(out, outb, out_size);
+
+ out_out:
+- mlx5_free_cmd_msg(dev, outb);
++ if (!callback)
++ mlx5_free_cmd_msg(dev, outb);
+
+ out_in:
+- free_msg(dev, inb);
++ if (!callback)
++ free_msg(dev, inb);
+ return err;
+ }
++
++int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
++ int out_size)
++{
++ return cmd_exec(dev, in, in_size, out, out_size, NULL, NULL);
++}
+ EXPORT_SYMBOL(mlx5_cmd_exec);
+
++int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size,
++ void *out, int out_size, mlx5_cmd_cbk_t callback,
++ void *context)
++{
++ return cmd_exec(dev, in, in_size, out, out_size, callback, context);
++}
++EXPORT_SYMBOL(mlx5_cmd_exec_cb);
++
+ static void destroy_msg_cache(struct mlx5_core_dev *dev)
+ {
+ struct mlx5_cmd *cmd = &dev->cmd;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+@@ -154,10 +154,10 @@ static ssize_t average_read(struct file *filp, char __user *buf, size_t count,
+ return 0;
+
+ stats = filp->private_data;
+- spin_lock(&stats->lock);
++ spin_lock_irq(&stats->lock);
+ if (stats->n)
+ field = div64_u64(stats->sum, stats->n);
+- spin_unlock(&stats->lock);
++ spin_unlock_irq(&stats->lock);
+ ret = snprintf(tbuf, sizeof(tbuf), "%llu\n", field);
+ if (ret > 0) {
+ if (copy_to_user(buf, tbuf, ret))
+@@ -175,10 +175,10 @@ static ssize_t average_write(struct file *filp, const char __user *buf,
+ struct mlx5_cmd_stats *stats;
+
+ stats = filp->private_data;
+- spin_lock(&stats->lock);
++ spin_lock_irq(&stats->lock);
+ stats->sum = 0;
+ stats->n = 0;
+- spin_unlock(&stats->lock);
++ spin_unlock_irq(&stats->lock);
+
+ *pos += count;
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+@@ -37,31 +37,41 @@
+ #include "mlx5_core.h"
+
+ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
+- struct mlx5_create_mkey_mbox_in *in, int inlen)
++ struct mlx5_create_mkey_mbox_in *in, int inlen,
++ mlx5_cmd_cbk_t callback, void *context,
++ struct mlx5_create_mkey_mbox_out *out)
+ {
+- struct mlx5_create_mkey_mbox_out out;
++ struct mlx5_create_mkey_mbox_out lout;
+ int err;
+ u8 key;
+
+- memset(&out, 0, sizeof(out));
+- spin_lock(&dev->priv.mkey_lock);
++ memset(&lout, 0, sizeof(lout));
++ spin_lock_irq(&dev->priv.mkey_lock);
+ key = dev->priv.mkey_key++;
+- spin_unlock(&dev->priv.mkey_lock);
++ spin_unlock_irq(&dev->priv.mkey_lock);
+ in->seg.qpn_mkey7_0 |= cpu_to_be32(key);
+ in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_MKEY);
+- err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
++ if (callback) {
++ err = mlx5_cmd_exec_cb(dev, in, inlen, out, sizeof(*out),
++ callback, context);
++ return err;
++ } else {
++ err = mlx5_cmd_exec(dev, in, inlen, &lout, sizeof(lout));
++ }
++
+ if (err) {
+ mlx5_core_dbg(dev, "cmd exec faile %d\n", err);
+ return err;
+ }
+
+- if (out.hdr.status) {
+- mlx5_core_dbg(dev, "status %d\n", out.hdr.status);
+- return mlx5_cmd_status_to_err(&out.hdr);
++ if (lout.hdr.status) {
++ mlx5_core_dbg(dev, "status %d\n", lout.hdr.status);
++ return mlx5_cmd_status_to_err(&lout.hdr);
+ }
+
+- mr->key = mlx5_idx_to_mkey(be32_to_cpu(out.mkey) & 0xffffff) | key;
+- mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", be32_to_cpu(out.mkey), key, mr->key);
++ mr->key = mlx5_idx_to_mkey(be32_to_cpu(lout.mkey) & 0xffffff) | key;
++ mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n",
++ be32_to_cpu(lout.mkey), key, mr->key);
+
+ return err;
+ }
+diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/include/linux/mlx5/driver.h
++++ b/include/linux/mlx5/driver.h
+@@ -557,9 +557,11 @@ typedef void (*mlx5_cmd_cbk_t)(int status, void *context);
+ struct mlx5_cmd_work_ent {
+ struct mlx5_cmd_msg *in;
+ struct mlx5_cmd_msg *out;
++ void *uout;
++ int uout_size;
+ mlx5_cmd_cbk_t callback;
+ void *context;
+- int idx;
++ int idx;
+ struct completion done;
+ struct mlx5_cmd *cmd;
+ struct work_struct work;
+@@ -570,6 +572,7 @@ struct mlx5_cmd_work_ent {
+ u8 token;
+ struct timespec ts1;
+ struct timespec ts2;
++ u16 op;
+ };
+
+ struct mlx5_pas {
+@@ -653,6 +656,9 @@ void mlx5_cmd_use_polling(struct mlx5_core_dev *dev);
+ int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr);
+ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
+ int out_size);
++int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size,
++ void *out, int out_size, mlx5_cmd_cbk_t callback,
++ void *context);
+ int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn);
+ int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn);
+ int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari);
+@@ -676,7 +682,9 @@ int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+ int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+ u16 lwm, int is_srq);
+ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
+- struct mlx5_create_mkey_mbox_in *in, int inlen);
++ struct mlx5_create_mkey_mbox_in *in, int inlen,
++ mlx5_cmd_cbk_t callback, void *context,
++ struct mlx5_create_mkey_mbox_out *out);
+ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr);
+ int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
+ struct mlx5_query_mkey_mbox_out *out, int outlen);
+@@ -745,6 +753,11 @@ static inline u32 mlx5_idx_to_mkey(u32 mkey_idx)
+ return mkey_idx << 8;
+ }
+
++static inline u8 mlx5_mkey_variant(u32 mkey)
++{
++ return mkey & 0xff;
++}
++
+ enum {
+ MLX5_PROF_MASK_QP_SIZE = (u64)1 << 0,
+ MLX5_PROF_MASK_MR_CACHE = (u64)1 << 1,
--- /dev/null
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Fix overflow check in IB_WR_FAST_REG_MR
+
+Make sure not to overflow when reading the page list from struct
+ib_fast_reg_page_list.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/qp.c | 4 ++++
+ 1 files changed, 4 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/qp.c
++++ b/drivers/infiniband/hw/mlx5/qp.c
+@@ -1915,6 +1915,10 @@ static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size,
+ if (unlikely((*seg == qp->sq.qend)))
+ *seg = mlx5_get_send_wqe(qp, 0);
+ if (!li) {
++ if (unlikely(wr->wr.fast_reg.page_list_len >
++ wr->wr.fast_reg.page_list->max_page_list_len))
++ return -ENOMEM;
++
+ set_frwr_pages(*seg, wr, mdev, pd, writ);
+ *seg += sizeof(struct mlx5_wqe_data_seg);
+ *size += (sizeof(struct mlx5_wqe_data_seg) / 16);
--- /dev/null
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Simplify mlx5_ib_destroy_srq
+
+Make use of destroy_srq_kernel() to clear SRQ resouces.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/srq.c | 4 +---
+ 1 files changed, 1 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/srq.c
++++ b/drivers/infiniband/hw/mlx5/srq.c
+@@ -390,9 +390,7 @@ int mlx5_ib_destroy_srq(struct ib_srq *srq)
+ mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);
+ ib_umem_release(msrq->umem);
+ } else {
+- kfree(msrq->wrid);
+- mlx5_buf_free(&dev->mdev, &msrq->buf);
+- mlx5_db_free(&dev->mdev, &msrq->db);
++ destroy_srq_kernel(dev, msrq);
+ }
+
+ kfree(srq);
--- /dev/null
+From: Moshe Lazer <moshel@mellanox.com>
+Subject: [PATCH] IB/mlx5: Fix srq free in destroy qp
+
+On destroy QP the driver walks over the relevant CQ and removes CQEs
+reported for the destroyed QP. It also frees the related SRQ entry
+without checking that this is actually an SRQ-related CQE. In case of
+a CQ used for both send and receive QP, we could free SRQ entries for
+send CQEs. This patch resolves this issue by verifying that this is a
+SRQ related CQE by checking the SRQ number in the CQE is not zero.
+
+Signed-off-by: Moshe Lazer <moshel@mellanox.com>
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/cq.c | 16 ++++------------
+ 1 files changed, 4 insertions(+), 12 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/cq.c
++++ b/drivers/infiniband/hw/mlx5/cq.c
+@@ -750,17 +750,9 @@ int mlx5_ib_destroy_cq(struct ib_cq *cq)
+ return 0;
+ }
+
+-static int is_equal_rsn(struct mlx5_cqe64 *cqe64, struct mlx5_ib_srq *srq,
+- u32 rsn)
++static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn)
+ {
+- u32 lrsn;
+-
+- if (srq)
+- lrsn = be32_to_cpu(cqe64->srqn) & 0xffffff;
+- else
+- lrsn = be32_to_cpu(cqe64->sop_drop_qpn) & 0xffffff;
+-
+- return rsn == lrsn;
++ return rsn == (ntohl(cqe64->sop_drop_qpn) & 0xffffff);
+ }
+
+ void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq)
+@@ -790,8 +782,8 @@ void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq)
+ while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
+ cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
+ cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
+- if (is_equal_rsn(cqe64, srq, rsn)) {
+- if (srq)
++ if (is_equal_rsn(cqe64, rsn)) {
++ if (srq && (ntohl(cqe64->srqn) & 0xffffff))
+ mlx5_ib_free_srq_wqe(srq, be16_to_cpu(cqe64->wqe_counter));
+ ++nfreed;
+ } else if (nfreed) {
--- /dev/null
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] mlx5: Fix cleanup flow when DMA mapping fails
+
+If DMA mapping fails, the driver cleared the object that holds the
+previously DMA mapped pages. Fix this by allocating a new object for
+the command that reports back to firmware that pages can't be
+supplied.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ .../net/ethernet/mellanox/mlx5/core/pagealloc.c | 18 +++++++++++++-----
+ 1 files changed, 13 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+@@ -181,6 +181,7 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
+ {
+ struct mlx5_manage_pages_inbox *in;
+ struct mlx5_manage_pages_outbox out;
++ struct mlx5_manage_pages_inbox *nin;
+ struct page *page;
+ int inlen;
+ u64 addr;
+@@ -247,13 +248,20 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
+
+ out_alloc:
+ if (notify_fail) {
+- memset(in, 0, inlen);
++ nin = kzalloc(sizeof(*nin), GFP_KERNEL);
++ if (!nin) {
++ mlx5_core_warn(dev, "allocation failed\n");
++ goto unmap;
++ }
+ memset(&out, 0, sizeof(out));
+- in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
+- in->hdr.opmod = cpu_to_be16(MLX5_PAGES_CANT_GIVE);
+- if (mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out)))
+- mlx5_core_warn(dev, "\n");
++ nin->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
++ nin->hdr.opmod = cpu_to_be16(MLX5_PAGES_CANT_GIVE);
++ if (mlx5_cmd_exec(dev, nin, sizeof(*nin), &out, sizeof(out)))
++ mlx5_core_warn(dev, "page notify failed\n");
++ kfree(nin);
+ }
++
++unmap:
+ for (i--; i >= 0; i--) {
+ addr = be64_to_cpu(in->pas[i]);
+ page = remove_page(dev, addr);
--- /dev/null
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] mlx5: Support communicating arbitrary host page size to firmware
+
+Connect-IB firmware requires 4K pages to be communicated with the
+driver. This patch breaks larger pages to 4K units to enable support
+for architectures utilizing larger page size, such as PowerPC. This
+patch also fixes several places that referred to PAGE_SHIFT instead of
+explicit 12 which is the inherent page shift on Connect-IB.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/cq.c | 2 +-
+ drivers/infiniband/hw/mlx5/qp.c | 4 +-
+ drivers/infiniband/hw/mlx5/srq.c | 4 +-
+ drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 +-
+ .../net/ethernet/mellanox/mlx5/core/pagealloc.c | 174 ++++++++++++++------
+ include/linux/mlx5/driver.h | 1 +
+ 6 files changed, 127 insertions(+), 60 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/cq.c
++++ b/drivers/infiniband/hw/mlx5/cq.c
+@@ -620,7 +620,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
+ }
+ mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas);
+
+- (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - PAGE_SHIFT;
++ (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - 12;
+ *index = dev->mdev.priv.uuari.uars[0].index;
+
+ return 0;
+diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/qp.c
++++ b/drivers/infiniband/hw/mlx5/qp.c
+@@ -551,7 +551,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ }
+ mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
+ (*in)->ctx.log_pg_sz_remote_qpn =
+- cpu_to_be32((page_shift - PAGE_SHIFT) << 24);
++ cpu_to_be32((page_shift - 12) << 24);
+ (*in)->ctx.params2 = cpu_to_be32(offset << 6);
+
+ (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
+@@ -648,7 +648,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
+ goto err_buf;
+ }
+ (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
+- (*in)->ctx.log_pg_sz_remote_qpn = cpu_to_be32((qp->buf.page_shift - PAGE_SHIFT) << 24);
++ (*in)->ctx.log_pg_sz_remote_qpn = cpu_to_be32((qp->buf.page_shift - 12) << 24);
+ /* Set "fast registration enabled" for all kernel QPs */
+ (*in)->ctx.params1 |= cpu_to_be32(1 << 11);
+ (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4);
+diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/srq.c
++++ b/drivers/infiniband/hw/mlx5/srq.c
+@@ -123,7 +123,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
+ goto err_in;
+ }
+
+- (*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
++ (*in)->ctx.log_pg_sz = page_shift - 12;
+ (*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
+
+ return 0;
+@@ -192,7 +192,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
+ }
+ srq->wq_sig = !!srq_signature;
+
+- (*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
++ (*in)->ctx.log_pg_sz = page_shift - 12;
+
+ return 0;
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+@@ -354,7 +354,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
+ in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_EQ);
+ in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(eq->nent) << 24 | uar->index);
+ in->ctx.intr = vecidx;
+- in->ctx.log_page_size = PAGE_SHIFT - 12;
++ in->ctx.log_page_size = eq->buf.page_shift - 12;
+ in->events_mask = cpu_to_be64(mask);
+
+ err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+@@ -57,10 +57,13 @@ struct mlx5_pages_req {
+ };
+
+ struct fw_page {
+- struct rb_node rb_node;
+- u64 addr;
+- struct page *page;
+- u16 func_id;
++ struct rb_node rb_node;
++ u64 addr;
++ struct page *page;
++ u16 func_id;
++ unsigned long bitmask;
++ struct list_head list;
++ unsigned free_count;
+ };
+
+ struct mlx5_query_pages_inbox {
+@@ -94,6 +97,11 @@ enum {
+ MAX_RECLAIM_TIME_MSECS = 5000,
+ };
+
++enum {
++ MLX5_MAX_RECLAIM_TIME_MILI = 5000,
++ MLX5_NUM_4K_IN_PAGE = PAGE_SIZE / 4096,
++};
++
+ static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id)
+ {
+ struct rb_root *root = &dev->priv.page_root;
+@@ -101,6 +109,7 @@ static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u
+ struct rb_node *parent = NULL;
+ struct fw_page *nfp;
+ struct fw_page *tfp;
++ int i;
+
+ while (*new) {
+ parent = *new;
+@@ -113,25 +122,29 @@ static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u
+ return -EEXIST;
+ }
+
+- nfp = kmalloc(sizeof(*nfp), GFP_KERNEL);
++ nfp = kzalloc(sizeof(*nfp), GFP_KERNEL);
+ if (!nfp)
+ return -ENOMEM;
+
+ nfp->addr = addr;
+ nfp->page = page;
+ nfp->func_id = func_id;
++ nfp->free_count = MLX5_NUM_4K_IN_PAGE;
++ for (i = 0; i < MLX5_NUM_4K_IN_PAGE; i++)
++ set_bit(i, &nfp->bitmask);
+
+ rb_link_node(&nfp->rb_node, parent, new);
+ rb_insert_color(&nfp->rb_node, root);
++ list_add(&nfp->list, &dev->priv.free_list);
+
+ return 0;
+ }
+
+-static struct page *remove_page(struct mlx5_core_dev *dev, u64 addr)
++static struct fw_page *find_fw_page(struct mlx5_core_dev *dev, u64 addr)
+ {
+ struct rb_root *root = &dev->priv.page_root;
+ struct rb_node *tmp = root->rb_node;
+- struct page *result = NULL;
++ struct fw_page *result = NULL;
+ struct fw_page *tfp;
+
+ while (tmp) {
+@@ -141,9 +154,7 @@ static struct page *remove_page(struct mlx5_core_dev *dev, u64 addr)
+ } else if (tfp->addr > addr) {
+ tmp = tmp->rb_right;
+ } else {
+- rb_erase(&tfp->rb_node, root);
+- result = tfp->page;
+- kfree(tfp);
++ result = tfp;
+ break;
+ }
+ }
+@@ -176,13 +187,97 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
+ return err;
+ }
+
++static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr)
++{
++ struct fw_page *fp;
++ unsigned n;
++
++ if (list_empty(&dev->priv.free_list)) {
++ return -ENOMEM;
++ mlx5_core_warn(dev, "\n");
++ }
++
++ fp = list_entry(dev->priv.free_list.next, struct fw_page, list);
++ n = find_first_bit(&fp->bitmask, 8 * sizeof(fp->bitmask));
++ if (n >= MLX5_NUM_4K_IN_PAGE) {
++ mlx5_core_warn(dev, "alloc 4k bug\n");
++ return -ENOENT;
++ }
++ clear_bit(n, &fp->bitmask);
++ fp->free_count--;
++ if (!fp->free_count)
++ list_del(&fp->list);
++
++ *addr = fp->addr + n * 4096;
++
++ return 0;
++}
++
++static void free_4k(struct mlx5_core_dev *dev, u64 addr)
++{
++ struct fw_page *fwp;
++ int n;
++
++ fwp = find_fw_page(dev, addr & PAGE_MASK);
++ if (!fwp) {
++ mlx5_core_warn(dev, "page not found\n");
++ return;
++ }
++
++ n = (addr & ~PAGE_MASK) % 4096;
++ fwp->free_count++;
++ set_bit(n, &fwp->bitmask);
++ if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) {
++ rb_erase(&fwp->rb_node, &dev->priv.page_root);
++ list_del(&fwp->list);
++ dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
++ __free_page(fwp->page);
++ kfree(fwp);
++ } else if (fwp->free_count == 1) {
++ list_add(&fwp->list, &dev->priv.free_list);
++ }
++}
++
++static int alloc_system_page(struct mlx5_core_dev *dev, u16 func_id)
++{
++ struct page *page;
++ u64 addr;
++ int err;
++
++ page = alloc_page(GFP_HIGHUSER);
++ if (!page) {
++ mlx5_core_warn(dev, "failed to allocate page\n");
++ return -ENOMEM;
++ }
++ addr = dma_map_page(&dev->pdev->dev, page, 0,
++ PAGE_SIZE, DMA_BIDIRECTIONAL);
++ if (dma_mapping_error(&dev->pdev->dev, addr)) {
++ mlx5_core_warn(dev, "failed dma mapping page\n");
++ err = -ENOMEM;
++ goto out_alloc;
++ }
++ err = insert_page(dev, addr, page, func_id);
++ if (err) {
++ mlx5_core_err(dev, "failed to track allocated page\n");
++ goto out_mapping;
++ }
++
++ return 0;
++
++out_mapping:
++ dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
++
++out_alloc:
++ __free_page(page);
++
++ return err;
++}
+ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
+ int notify_fail)
+ {
+ struct mlx5_manage_pages_inbox *in;
+ struct mlx5_manage_pages_outbox out;
+ struct mlx5_manage_pages_inbox *nin;
+- struct page *page;
+ int inlen;
+ u64 addr;
+ int err;
+@@ -197,27 +292,15 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
+ memset(&out, 0, sizeof(out));
+
+ for (i = 0; i < npages; i++) {
+- page = alloc_page(GFP_HIGHUSER);
+- if (!page) {
+- err = -ENOMEM;
+- mlx5_core_warn(dev, "failed to allocate page\n");
+- goto out_alloc;
+- }
+- addr = dma_map_page(&dev->pdev->dev, page, 0,
+- PAGE_SIZE, DMA_BIDIRECTIONAL);
+- if (dma_mapping_error(&dev->pdev->dev, addr)) {
+- mlx5_core_warn(dev, "failed dma mapping page\n");
+- __free_page(page);
+- err = -ENOMEM;
+- goto out_alloc;
+- }
+- err = insert_page(dev, addr, page, func_id);
++retry:
++ err = alloc_4k(dev, &addr);
+ if (err) {
+- mlx5_core_err(dev, "failed to track allocated page\n");
+- dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+- __free_page(page);
+- err = -ENOMEM;
+- goto out_alloc;
++ if (err == -ENOMEM)
++ err = alloc_system_page(dev, func_id);
++ if (err)
++ goto out_4k;
++
++ goto retry;
+ }
+ in->pas[i] = cpu_to_be64(addr);
+ }
+@@ -227,7 +310,6 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
+ in->func_id = cpu_to_be16(func_id);
+ in->num_entries = cpu_to_be32(npages);
+ err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
+- mlx5_core_dbg(dev, "err %d\n", err);
+ if (err) {
+ mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n", func_id, npages, err);
+ goto out_alloc;
+@@ -251,7 +333,7 @@ out_alloc:
+ nin = kzalloc(sizeof(*nin), GFP_KERNEL);
+ if (!nin) {
+ mlx5_core_warn(dev, "allocation failed\n");
+- goto unmap;
++ goto out_4k;
+ }
+ memset(&out, 0, sizeof(out));
+ nin->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
+@@ -261,19 +343,9 @@ out_alloc:
+ kfree(nin);
+ }
+
+-unmap:
+- for (i--; i >= 0; i--) {
+- addr = be64_to_cpu(in->pas[i]);
+- page = remove_page(dev, addr);
+- if (!page) {
+- mlx5_core_err(dev, "BUG: can't remove page at addr 0x%llx\n",
+- addr);
+- continue;
+- }
+- dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+- __free_page(page);
+- }
+-
++out_4k:
++ for (i--; i >= 0; i--)
++ free_4k(dev, be64_to_cpu(in->pas[i]));
+ out_free:
+ mlx5_vfree(in);
+ return err;
+@@ -284,7 +356,6 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
+ {
+ struct mlx5_manage_pages_inbox in;
+ struct mlx5_manage_pages_outbox *out;
+- struct page *page;
+ int num_claimed;
+ int outlen;
+ u64 addr;
+@@ -323,13 +394,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
+
+ for (i = 0; i < num_claimed; i++) {
+ addr = be64_to_cpu(out->pas[i]);
+- page = remove_page(dev, addr);
+- if (!page) {
+- mlx5_core_warn(dev, "FW reported unknown DMA address 0x%llx\n", addr);
+- } else {
+- dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+- __free_page(page);
+- }
++ free_4k(dev, addr);
+ }
+
+ out_free:
+@@ -435,6 +500,7 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
+ void mlx5_pagealloc_init(struct mlx5_core_dev *dev)
+ {
+ dev->priv.page_root = RB_ROOT;
++ INIT_LIST_HEAD(&dev->priv.free_list);
+ }
+
+ void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev)
+diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/include/linux/mlx5/driver.h
++++ b/include/linux/mlx5/driver.h
+@@ -483,6 +483,7 @@ struct mlx5_priv {
+ struct rb_root page_root;
+ int fw_pages;
+ int reg_pages;
++ struct list_head free_list;
+
+ struct mlx5_core_health health;
+
--- /dev/null
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] mlx5: Clear reserved area in set_hca_cap()
+
+Firmware spec requires reserved fields to be cleared when calling
+set_hca_cap. Current code queries and copy to the set area, possibly
+resulting in reserved bits not cleared. This patch copies only
+writable fields to the set area.
+
+Fix also typo - msx => max
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/main.c | 35 +++++++++++++++++++++--
+ include/linux/mlx5/device.h | 9 +++++-
+ 2 files changed, 39 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -159,6 +159,36 @@ struct mlx5_reg_host_endianess {
+ u8 rsvd[15];
+ };
+
++
++#define CAP_MASK(pos, size) ((u64)((1 << (size)) - 1) << (pos))
++
++enum {
++ MLX5_CAP_BITS_RW_MASK = CAP_MASK(MLX5_CAP_OFF_CMDIF_CSUM, 2) |
++ CAP_MASK(MLX5_CAP_OFF_DCT, 1),
++};
++
++/* selectively copy writable fields clearing any reserved area
++ */
++static void copy_rw_fields(struct mlx5_hca_cap *to, struct mlx5_hca_cap *from)
++{
++ u64 v64;
++
++ to->log_max_qp = from->log_max_qp & 0x1f;
++ to->log_max_ra_req_dc = from->log_max_ra_req_dc & 0x3f;
++ to->log_max_ra_res_dc = from->log_max_ra_res_dc & 0x3f;
++ to->log_max_ra_req_qp = from->log_max_ra_req_qp & 0x3f;
++ to->log_max_ra_res_qp = from->log_max_ra_res_qp & 0x3f;
++ to->log_max_atomic_size_qp = from->log_max_atomic_size_qp;
++ to->log_max_atomic_size_dc = from->log_max_atomic_size_dc;
++ v64 = be64_to_cpu(from->flags) & MLX5_CAP_BITS_RW_MASK;
++ to->flags = cpu_to_be64(v64);
++}
++
++enum {
++ HCA_CAP_OPMOD_GET_MAX = 0,
++ HCA_CAP_OPMOD_GET_CUR = 1,
++};
++
+ static int handle_hca_cap(struct mlx5_core_dev *dev)
+ {
+ struct mlx5_cmd_query_hca_cap_mbox_out *query_out = NULL;
+@@ -180,7 +210,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
+ }
+
+ query_ctx.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_HCA_CAP);
+- query_ctx.hdr.opmod = cpu_to_be16(0x1);
++ query_ctx.hdr.opmod = cpu_to_be16(HCA_CAP_OPMOD_GET_CUR);
+ err = mlx5_cmd_exec(dev, &query_ctx, sizeof(query_ctx),
+ query_out, sizeof(*query_out));
+ if (err)
+@@ -192,8 +222,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
+ goto query_ex;
+ }
+
+- memcpy(&set_ctx->hca_cap, &query_out->hca_cap,
+- sizeof(set_ctx->hca_cap));
++ copy_rw_fields(&set_ctx->hca_cap, &query_out->hca_cap);
+
+ if (dev->profile->mask & MLX5_PROF_MASK_QP_SIZE)
+ set_ctx->hca_cap.log_max_qp = dev->profile->log_max_qp;
+diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/include/linux/mlx5/device.h
++++ b/include/linux/mlx5/device.h
+@@ -230,6 +230,11 @@ enum {
+ MLX5_MAX_PAGE_SHIFT = 31
+ };
+
++enum {
++ MLX5_CAP_OFF_DCT = 41,
++ MLX5_CAP_OFF_CMDIF_CSUM = 46,
++};
++
+ struct mlx5_inbox_hdr {
+ __be16 opcode;
+ u8 rsvd[4];
+@@ -319,9 +324,9 @@ struct mlx5_hca_cap {
+ u8 rsvd25[42];
+ __be16 log_uar_page_sz;
+ u8 rsvd26[28];
+- u8 log_msx_atomic_size_qp;
++ u8 log_max_atomic_size_qp;
+ u8 rsvd27[2];
+- u8 log_msx_atomic_size_dc;
++ u8 log_max_atomic_size_dc;
+ u8 rsvd28[76];
+ };
+
--- /dev/null
+From: Moshe Lazer <moshel@mellanox.com>
+Subject: [PATCH] mlx5_core: Change optimal_reclaimed_pages for better performance
+
+Change optimal_reclaimed_pages() to increase the output size of each
+reclaim pages command. This change reduces significantly the amount of
+reclaim pages commands issued to FW when the driver is unloaded which
+reduces the overall driver unload time.
+
+Signed-off-by: Moshe Lazer <moshel@mellanox.com>
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ .../net/ethernet/mellanox/mlx5/core/pagealloc.c | 9 +++++++--
+ 1 files changed, 7 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+@@ -454,14 +454,19 @@ int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot)
+ return give_pages(dev, func_id, npages, 0);
+ }
+
++enum {
++ MLX5_BLKS_FOR_RECLAIM_PAGES = 12
++};
++
+ static int optimal_reclaimed_pages(void)
+ {
+ struct mlx5_cmd_prot_block *block;
+ struct mlx5_cmd_layout *lay;
+ int ret;
+
+- ret = (sizeof(lay->in) + sizeof(block->data) -
+- sizeof(struct mlx5_manage_pages_outbox)) / 8;
++ ret = (sizeof(lay->out) + MLX5_BLKS_FOR_RECLAIM_PAGES * sizeof(block->data) -
++ sizeof(struct mlx5_manage_pages_outbox)) /
++ FIELD_SIZEOF(struct mlx5_manage_pages_outbox, pas[0]);
+
+ return ret;
+ }
--- /dev/null
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Remove dead code in mr.c
+
+In mlx5_mr_cache_init() the size variable is not used so remove it to
+avoid compiler warnings when running with make W=1.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/mr.c | 10 +++-------
+ 1 files changed, 3 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/mr.c
++++ b/drivers/infiniband/hw/mlx5/mr.c
+@@ -40,7 +40,6 @@
+ #include "mlx5_ib.h"
+
+ enum {
+- DEF_CACHE_SIZE = 10,
+ MAX_PENDING_REG_MR = 8,
+ };
+
+@@ -550,7 +549,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
+ struct mlx5_mr_cache *cache = &dev->cache;
+ struct mlx5_cache_ent *ent;
+ int limit;
+- int size;
+ int err;
+ int i;
+
+@@ -571,13 +569,11 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
+ ent->order = i + 2;
+ ent->dev = dev;
+
+- if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE) {
+- size = dev->mdev.profile->mr_cache[i].size;
++ if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE)
+ limit = dev->mdev.profile->mr_cache[i].limit;
+- } else {
+- size = DEF_CACHE_SIZE;
++ else
+ limit = 0;
+- }
++
+ INIT_WORK(&ent->work, cache_work_func);
+ INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
+ ent->limit = limit;
--- /dev/null
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Remove "Always false" comparison
+
+mlx5_cur and mlx5_new cannot have negative values so remove the
+redundant condition.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/qp.c | 2 +-
+ 1 files changed, 1 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/qp.c
++++ b/drivers/infiniband/hw/mlx5/qp.c
+@@ -1550,7 +1550,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
+ mlx5_cur = to_mlx5_state(cur_state);
+ mlx5_new = to_mlx5_state(new_state);
+ mlx5_st = to_mlx5_st(ibqp->qp_type);
+- if (mlx5_cur < 0 || mlx5_new < 0 || mlx5_st < 0)
++ if (mlx5_st < 0)
+ goto out;
+
+ optpar = ib_mask_to_mlx5_opt(attr_mask);
--- /dev/null
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Update opt param mask for RTS2RTS
+
+RTS to RTS transition should allow update of alternate path.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/qp.c | 6 ++++--
+ 1 files changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/qp.c
++++ b/drivers/infiniband/hw/mlx5/qp.c
+@@ -1317,9 +1317,11 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q
+ MLX5_QP_OPTPAR_RAE |
+ MLX5_QP_OPTPAR_RWE |
+ MLX5_QP_OPTPAR_RNR_TIMEOUT |
+- MLX5_QP_OPTPAR_PM_STATE,
++ MLX5_QP_OPTPAR_PM_STATE |
++ MLX5_QP_OPTPAR_ALT_ADDR_PATH,
+ [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE |
+- MLX5_QP_OPTPAR_PM_STATE,
++ MLX5_QP_OPTPAR_PM_STATE |
++ MLX5_QP_OPTPAR_ALT_ADDR_PATH,
+ [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY |
+ MLX5_QP_OPTPAR_SRQN |
+ MLX5_QP_OPTPAR_CQN_RCV,
--- /dev/null
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] mlx5: Use enum to indicate adapter page size
+
+The Connect-IB adapter has an inherent page size which equals 4K.
+Define an new enum that equals the page shift and use it instead of
+using the value 12 throughout the code.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/cq.c | 2 +-
+ drivers/infiniband/hw/mlx5/qp.c | 5 +++--
+ drivers/infiniband/hw/mlx5/srq.c | 4 ++--
+ drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 +-
+ include/linux/mlx5/device.h | 4 ++++
+ 5 files changed, 11 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/cq.c
++++ b/drivers/infiniband/hw/mlx5/cq.c
+@@ -620,7 +620,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
+ }
+ mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas);
+
+- (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - 12;
++ (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+ *index = dev->mdev.priv.uuari.uars[0].index;
+
+ return 0;
+diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/qp.c
++++ b/drivers/infiniband/hw/mlx5/qp.c
+@@ -551,7 +551,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ }
+ mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
+ (*in)->ctx.log_pg_sz_remote_qpn =
+- cpu_to_be32((page_shift - 12) << 24);
++ cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
+ (*in)->ctx.params2 = cpu_to_be32(offset << 6);
+
+ (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
+@@ -648,7 +648,8 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
+ goto err_buf;
+ }
+ (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
+- (*in)->ctx.log_pg_sz_remote_qpn = cpu_to_be32((qp->buf.page_shift - 12) << 24);
++ (*in)->ctx.log_pg_sz_remote_qpn =
++ cpu_to_be32((qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
+ /* Set "fast registration enabled" for all kernel QPs */
+ (*in)->ctx.params1 |= cpu_to_be32(1 << 11);
+ (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4);
+diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/srq.c
++++ b/drivers/infiniband/hw/mlx5/srq.c
+@@ -123,7 +123,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
+ goto err_in;
+ }
+
+- (*in)->ctx.log_pg_sz = page_shift - 12;
++ (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+ (*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
+
+ return 0;
+@@ -192,7 +192,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
+ }
+ srq->wq_sig = !!srq_signature;
+
+- (*in)->ctx.log_pg_sz = page_shift - 12;
++ (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+
+ return 0;
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+@@ -354,7 +354,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
+ in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_EQ);
+ in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(eq->nent) << 24 | uar->index);
+ in->ctx.intr = vecidx;
+- in->ctx.log_page_size = eq->buf.page_shift - 12;
++ in->ctx.log_page_size = eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+ in->events_mask = cpu_to_be64(mask);
+
+ err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
+diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/include/linux/mlx5/device.h
++++ b/include/linux/mlx5/device.h
+@@ -231,6 +231,10 @@ enum {
+ };
+
+ enum {
++ MLX5_ADAPTER_PAGE_SHIFT = 12
++};
++
++enum {
+ MLX5_CAP_OFF_DCT = 41,
+ MLX5_CAP_OFF_CMDIF_CSUM = 46,
+ };
--- /dev/null
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Remove dead code
+
+The value of the local variable index is never used in reg_mr_callback().
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+
+[ Remove now-unused variable delta too. - Roland ]
+
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/mr.c | 6 ------
+ 1 files changed, 0 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/mr.c
++++ b/drivers/infiniband/hw/mlx5/mr.c
+@@ -72,14 +72,8 @@ static void reg_mr_callback(int status, void *context)
+ int c = order2idx(dev, mr->order);
+ struct mlx5_cache_ent *ent = &cache->ent[c];
+ u8 key;
+- unsigned long delta = jiffies - mr->start;
+- unsigned long index;
+ unsigned long flags;
+
+- index = find_last_bit(&delta, 8 * sizeof(delta));
+- if (index == 64)
+- index = 0;
+-
+ spin_lock_irqsave(&ent->lock, flags);
+ ent->pending--;
+ spin_unlock_irqrestore(&ent->lock, flags);
--- /dev/null
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Fix list_del of empty list
+
+For archs with pages size of 4K, when the chunk is freed, fwp is not in the
+list so avoid attempting to delete it.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ .../net/ethernet/mellanox/mlx5/core/pagealloc.c | 3 ++-
+ 1 files changed, 2 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+@@ -229,7 +229,8 @@ static void free_4k(struct mlx5_core_dev *dev, u64 addr)
+ set_bit(n, &fwp->bitmask);
+ if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) {
+ rb_erase(&fwp->rb_node, &dev->priv.page_root);
+- list_del(&fwp->list);
++ if (fwp->free_count != 1)
++ list_del(&fwp->list);
+ dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ __free_page(fwp->page);
+ kfree(fwp);
--- /dev/null
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Fix page shift in create CQ for userspace
+
+When creating a CQ, we must use mlx5 adapter page shift.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/cq.c | 2 +-
+ 1 files changed, 1 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/cq.c
++++ b/drivers/infiniband/hw/mlx5/cq.c
+@@ -556,7 +556,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
+ goto err_db;
+ }
+ mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, (*cqb)->pas, 0);
+- (*cqb)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
++ (*cqb)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+
+ *index = to_mucontext(context)->uuari.uars[0].index;
+
--- /dev/null
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Remove unused code in mr.c
+
+The variable start in struct mlx5_ib_mr is never used. Remove it.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 -
+ drivers/infiniband/hw/mlx5/mr.c | 1 -
+ 2 files changed, 0 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
++++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
+@@ -264,7 +264,6 @@ struct mlx5_ib_mr {
+ enum ib_wc_status status;
+ struct mlx5_ib_dev *dev;
+ struct mlx5_create_mkey_mbox_out out;
+- unsigned long start;
+ };
+
+ struct mlx5_ib_fast_reg_page_list {
+diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/mr.c
++++ b/drivers/infiniband/hw/mlx5/mr.c
+@@ -146,7 +146,6 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
+ spin_lock_irq(&ent->lock);
+ ent->pending++;
+ spin_unlock_irq(&ent->lock);
+- mr->start = jiffies;
+ err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in,
+ sizeof(*in), reg_mr_callback,
+ mr, &mr->out);
--- /dev/null
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Subject: [PATCH] mlx5_core: Remove dead code
+
+Remove leftover of debug code.
+
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ .../net/ethernet/mellanox/mlx5/core/pagealloc.c | 4 +---
+ 1 files changed, 1 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+@@ -192,10 +192,8 @@ static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr)
+ struct fw_page *fp;
+ unsigned n;
+
+- if (list_empty(&dev->priv.free_list)) {
++ if (list_empty(&dev->priv.free_list))
+ return -ENOMEM;
+- mlx5_core_warn(dev, "\n");
+- }
+
+ fp = list_entry(dev->priv.free_list.next, struct fw_page, list);
+ n = find_first_bit(&fp->bitmask, 8 * sizeof(fp->bitmask));
--- /dev/null
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Fix micro UAR allocator
+
+The micro UAR (uuar) allocator had a bug which resulted from the fact
+that in each UAR we only have two micro UARs avaialable, those at
+index 0 and 1. This patch defines iterators to aid in traversing the
+list of available micro UARs when allocating a uuar.
+
+In addition, change the logic in create_user_qp() so that if high
+class allocation fails (high class means lower latency), we revert to
+medium class and not to the low class.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/main.c | 13 ++++--
+ drivers/infiniband/hw/mlx5/qp.c | 77 ++++++++++++++++++++++++++++---------
+ include/linux/mlx5/device.h | 7 ++-
+ 3 files changed, 70 insertions(+), 27 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/main.c
++++ b/drivers/infiniband/hw/mlx5/main.c
+@@ -541,6 +541,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
+ struct mlx5_ib_ucontext *context;
+ struct mlx5_uuar_info *uuari;
+ struct mlx5_uar *uars;
++ int gross_uuars;
+ int num_uars;
+ int uuarn;
+ int err;
+@@ -559,11 +560,13 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
+ if (req.total_num_uuars == 0)
+ return ERR_PTR(-EINVAL);
+
+- req.total_num_uuars = ALIGN(req.total_num_uuars, MLX5_BF_REGS_PER_PAGE);
++ req.total_num_uuars = ALIGN(req.total_num_uuars,
++ MLX5_NON_FP_BF_REGS_PER_PAGE);
+ if (req.num_low_latency_uuars > req.total_num_uuars - 1)
+ return ERR_PTR(-EINVAL);
+
+- num_uars = req.total_num_uuars / MLX5_BF_REGS_PER_PAGE;
++ num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
++ gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
+ resp.qp_tab_size = 1 << dev->mdev.caps.log_max_qp;
+ resp.bf_reg_size = dev->mdev.caps.bf_reg_size;
+ resp.cache_line_size = L1_CACHE_BYTES;
+@@ -585,7 +588,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
+ goto out_ctx;
+ }
+
+- uuari->bitmap = kcalloc(BITS_TO_LONGS(req.total_num_uuars),
++ uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars),
+ sizeof(*uuari->bitmap),
+ GFP_KERNEL);
+ if (!uuari->bitmap) {
+@@ -595,13 +598,13 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
+ /*
+ * clear all fast path uuars
+ */
+- for (i = 0; i < req.total_num_uuars; i++) {
++ for (i = 0; i < gross_uuars; i++) {
+ uuarn = i & 3;
+ if (uuarn == 2 || uuarn == 3)
+ set_bit(i, uuari->bitmap);
+ }
+
+- uuari->count = kcalloc(req.total_num_uuars, sizeof(*uuari->count), GFP_KERNEL);
++ uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL);
+ if (!uuari->count) {
+ err = -ENOMEM;
+ goto out_bitmap;
+diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/qp.c
++++ b/drivers/infiniband/hw/mlx5/qp.c
+@@ -340,14 +340,57 @@ static int qp_has_rq(struct ib_qp_init_attr *attr)
+ return 1;
+ }
+
++static int first_med_uuar(void)
++{
++ return 1;
++}
++
++static int next_uuar(int n)
++{
++ n++;
++
++ while (((n % 4) & 2))
++ n++;
++
++ return n;
++}
++
++static int num_med_uuar(struct mlx5_uuar_info *uuari)
++{
++ int n;
++
++ n = uuari->num_uars * MLX5_NON_FP_BF_REGS_PER_PAGE -
++ uuari->num_low_latency_uuars - 1;
++
++ return n >= 0 ? n : 0;
++}
++
++static int max_uuari(struct mlx5_uuar_info *uuari)
++{
++ return uuari->num_uars * 4;
++}
++
++static int first_hi_uuar(struct mlx5_uuar_info *uuari)
++{
++ int med;
++ int i;
++ int t;
++
++ med = num_med_uuar(uuari);
++ for (t = 0, i = first_med_uuar();; i = next_uuar(i)) {
++ t++;
++ if (t == med)
++ return next_uuar(i);
++ }
++
++ return 0;
++}
++
+ static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari)
+ {
+- int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
+- int start_uuar;
+ int i;
+
+- start_uuar = nuuars - uuari->num_low_latency_uuars;
+- for (i = start_uuar; i < nuuars; i++) {
++ for (i = first_hi_uuar(uuari); i < max_uuari(uuari); i = next_uuar(i)) {
+ if (!test_bit(i, uuari->bitmap)) {
+ set_bit(i, uuari->bitmap);
+ uuari->count[i]++;
+@@ -360,19 +403,10 @@ static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari)
+
+ static int alloc_med_class_uuar(struct mlx5_uuar_info *uuari)
+ {
+- int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
+- int minidx = 1;
+- int uuarn;
+- int end;
++ int minidx = first_med_uuar();
+ int i;
+
+- end = nuuars - uuari->num_low_latency_uuars;
+-
+- for (i = 1; i < end; i++) {
+- uuarn = i & 3;
+- if (uuarn == 2 || uuarn == 3)
+- continue;
+-
++ for (i = first_med_uuar(); i < first_hi_uuar(uuari); i = next_uuar(i)) {
+ if (uuari->count[i] < uuari->count[minidx])
+ minidx = i;
+ }
+@@ -510,11 +544,16 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
+ if (uuarn < 0) {
+ mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
+- mlx5_ib_dbg(dev, "reverting to high latency\n");
+- uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
++ mlx5_ib_dbg(dev, "reverting to medium latency\n");
++ uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM);
+ if (uuarn < 0) {
+- mlx5_ib_dbg(dev, "uuar allocation failed\n");
+- return uuarn;
++ mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n");
++ mlx5_ib_dbg(dev, "reverting to high latency\n");
++ uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
++ if (uuarn < 0) {
++ mlx5_ib_warn(dev, "uuar allocation failed\n");
++ return uuarn;
++ }
+ }
+ }
+
+diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/include/linux/mlx5/device.h
++++ b/include/linux/mlx5/device.h
+@@ -104,9 +104,10 @@ enum {
+ };
+
+ enum {
+- MLX5_BF_REGS_PER_PAGE = 4,
+- MLX5_MAX_UAR_PAGES = 1 << 8,
+- MLX5_MAX_UUARS = MLX5_MAX_UAR_PAGES * MLX5_BF_REGS_PER_PAGE,
++ MLX5_BF_REGS_PER_PAGE = 4,
++ MLX5_MAX_UAR_PAGES = 1 << 8,
++ MLX5_NON_FP_BF_REGS_PER_PAGE = 2,
++ MLX5_MAX_UUARS = MLX5_MAX_UAR_PAGES * MLX5_NON_FP_BF_REGS_PER_PAGE,
+ };
+
+ enum {
--- /dev/null
+From: Haggai Eran <haggaie@mellanox.com>
+Subject: [PATCH] mlx5_core: Fix out arg size in access_register command
+
+The output size should be the sum of the core access reg output struct
+plus the size of the specific register data provided by the caller.
+
+Signed-off-by: Haggai Eran <haggaie@mellanox.com>
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/port.c | 2 +-
+ 1 files changed, 1 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
+@@ -57,7 +57,7 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
+ in->arg = cpu_to_be32(arg);
+ in->register_id = cpu_to_be16(reg_num);
+ err = mlx5_cmd_exec(dev, in, sizeof(*in) + size_in, out,
+- sizeof(out) + size_out);
++ sizeof(*out) + size_out);
+ if (err)
+ goto ex2;
+
--- /dev/null
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Clear out struct before create QP command
+
+Output structs are expected by firmware to be cleared when a command is called.
+Clear the "out" struct instead of "dout" which is used only later.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/qp.c | 2 +-
+ 1 files changed, 1 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+@@ -74,7 +74,7 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
+ struct mlx5_destroy_qp_mbox_out dout;
+ int err;
+
+- memset(&dout, 0, sizeof(dout));
++ memset(&out, 0, sizeof(out));
+ in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_QP);
+
+ err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
--- /dev/null
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] mlx5_core: Use mlx5 core style warning
+
+Use mlx5_core_warn(), which is the standard warning emitter function, instead
+of pr_warn().
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/qp.c | 3 ++-
+ 1 files changed, 2 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+@@ -84,7 +84,8 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
+ }
+
+ if (out.hdr.status) {
+- pr_warn("current num of QPs 0x%x\n", atomic_read(&dev->num_qps));
++ mlx5_core_warn(dev, "current num of QPs 0x%x\n",
++ atomic_read(&dev->num_qps));
+ return mlx5_cmd_status_to_err(&out.hdr);
+ }
+
--- /dev/null
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Make sure doorbell record is visible before doorbell
+
+Put a wmb() to make sure the doorbell record is visible to the HCA before we
+hit doorbell.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/qp.c | 4 ++++
+ 1 files changed, 4 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/qp.c
++++ b/drivers/infiniband/hw/mlx5/qp.c
+@@ -2251,6 +2251,10 @@ out:
+
+ qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
+
++ /* Make sure doorbell record is visible to the HCA before
++ * we hit doorbell */
++ wmb();
++
+ if (bf->need_lock)
+ spin_lock(&bf->lock);
+
--- /dev/null
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Implement modify CQ
+
+Modify CQ is used by ULPs like IPoIB to change moderation parameters. This
+patch adds support in mlx5.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/cq.c | 26 +++++++++++++++++++++++++-
+ drivers/net/ethernet/mellanox/mlx5/core/cq.c | 17 +++++++++++++++--
+ include/linux/mlx5/cq.h | 8 ++++----
+ include/linux/mlx5/device.h | 15 +++++++++++++++
+ 4 files changed, 59 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/cq.c
++++ b/drivers/infiniband/hw/mlx5/cq.c
+@@ -818,7 +818,31 @@ void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq)
+
+ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
+ {
+- return -ENOSYS;
++ struct mlx5_modify_cq_mbox_in *in;
++ struct mlx5_ib_dev *dev = to_mdev(cq->device);
++ struct mlx5_ib_cq *mcq = to_mcq(cq);
++ int err;
++ u32 fsel;
++
++ if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_CQ_MODER))
++ return -ENOSYS;
++
++ in = kzalloc(sizeof(*in), GFP_KERNEL);
++ if (!in)
++ return -ENOMEM;
++
++ in->cqn = cpu_to_be32(mcq->mcq.cqn);
++ fsel = (MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT);
++ in->ctx.cq_period = cpu_to_be16(cq_period);
++ in->ctx.cq_max_count = cpu_to_be16(cq_count);
++ in->field_select = cpu_to_be32(fsel);
++ err = mlx5_core_modify_cq(&dev->mdev, &mcq->mcq, in);
++ kfree(in);
++
++ if (err)
++ mlx5_ib_warn(dev, "modify cq 0x%x failed\n", mcq->mcq.cqn);
++
++ return err;
+ }
+
+ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+@@ -201,10 +201,23 @@ EXPORT_SYMBOL(mlx5_core_query_cq);
+
+
+ int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+- int type, struct mlx5_cq_modify_params *params)
++ struct mlx5_modify_cq_mbox_in *in)
+ {
+- return -ENOSYS;
++ struct mlx5_modify_cq_mbox_out out;
++ int err;
++
++ memset(&out, 0, sizeof(out));
++ in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MODIFY_CQ);
++ err = mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out));
++ if (err)
++ return err;
++
++ if (out.hdr.status)
++ return mlx5_cmd_status_to_err(&out.hdr);
++
++ return 0;
+ }
++EXPORT_SYMBOL(mlx5_core_modify_cq);
+
+ int mlx5_init_cq_table(struct mlx5_core_dev *dev)
+ {
+diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/include/linux/mlx5/cq.h
++++ b/include/linux/mlx5/cq.h
+@@ -85,9 +85,9 @@ enum {
+ };
+
+ enum {
+- MLX5_CQ_MODIFY_RESEIZE = 0,
+- MLX5_CQ_MODIFY_MODER = 1,
+- MLX5_CQ_MODIFY_MAPPING = 2,
++ MLX5_CQ_MODIFY_PERIOD = 1 << 0,
++ MLX5_CQ_MODIFY_COUNT = 1 << 1,
++ MLX5_CQ_MODIFY_OVERRUN = 1 << 2,
+ };
+
+ struct mlx5_cq_modify_params {
+@@ -158,7 +158,7 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
+ int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+ struct mlx5_query_cq_mbox_out *out);
+ int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+- int type, struct mlx5_cq_modify_params *params);
++ struct mlx5_modify_cq_mbox_in *in);
+ int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
+ void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
+
+diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/include/linux/mlx5/device.h
++++ b/include/linux/mlx5/device.h
+@@ -177,6 +177,7 @@ enum {
+ MLX5_DEV_CAP_FLAG_APM = 1LL << 17,
+ MLX5_DEV_CAP_FLAG_ATOMIC = 1LL << 18,
+ MLX5_DEV_CAP_FLAG_ON_DMND_PG = 1LL << 24,
++ MLX5_DEV_CAP_FLAG_CQ_MODER = 1LL << 29,
+ MLX5_DEV_CAP_FLAG_RESIZE_SRQ = 1LL << 32,
+ MLX5_DEV_CAP_FLAG_REMOTE_FENCE = 1LL << 38,
+ MLX5_DEV_CAP_FLAG_TLP_HINTS = 1LL << 39,
+@@ -698,6 +699,19 @@ struct mlx5_query_cq_mbox_out {
+ __be64 pas[0];
+ };
+
++struct mlx5_modify_cq_mbox_in {
++ struct mlx5_inbox_hdr hdr;
++ __be32 cqn;
++ __be32 field_select;
++ struct mlx5_cq_context ctx;
++ u8 rsvd[192];
++ __be64 pas[0];
++};
++
++struct mlx5_modify_cq_mbox_out {
++ struct mlx5_outbox_hdr hdr;
++};
++
+ struct mlx5_enable_hca_mbox_in {
+ struct mlx5_inbox_hdr hdr;
+ u8 rsvd[8];
+@@ -872,6 +886,7 @@ struct mlx5_modify_mkey_mbox_in {
+
+ struct mlx5_modify_mkey_mbox_out {
+ struct mlx5_outbox_hdr hdr;
++ u8 rsvd[8];
+ };
+
+ struct mlx5_dump_mkey_mbox_in {
--- /dev/null
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Add support for resize CQ
+
+Implement resize CQ which is a mandatory verb in mlx5.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/cq.c | 282 ++++++++++++++++++++++++--
+ drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 +-
+ drivers/infiniband/hw/mlx5/user.h | 3 +
+ drivers/net/ethernet/mellanox/mlx5/core/cq.c | 4 +-
+ include/linux/mlx5/cq.h | 12 +-
+ include/linux/mlx5/device.h | 2 +
+ 6 files changed, 284 insertions(+), 22 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/cq.c
++++ b/drivers/infiniband/hw/mlx5/cq.c
+@@ -73,14 +73,24 @@ static void *get_cqe(struct mlx5_ib_cq *cq, int n)
+ return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz);
+ }
+
++static u8 sw_ownership_bit(int n, int nent)
++{
++ return (n & nent) ? 1 : 0;
++}
++
+ static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n)
+ {
+ void *cqe = get_cqe(cq, n & cq->ibcq.cqe);
+ struct mlx5_cqe64 *cqe64;
+
+ cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
+- return ((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^
+- !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;
++
++ if (likely((cqe64->op_own) >> 4 != MLX5_CQE_INVALID) &&
++ !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1)))) {
++ return cqe;
++ } else {
++ return NULL;
++ }
+ }
+
+ static void *next_cqe_sw(struct mlx5_ib_cq *cq)
+@@ -351,6 +361,11 @@ static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
+ qp->sq.last_poll = tail;
+ }
+
++static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
++{
++ mlx5_buf_free(&dev->mdev, &buf->buf);
++}
++
+ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
+ struct mlx5_ib_qp **cur_qp,
+ struct ib_wc *wc)
+@@ -366,6 +381,7 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
+ void *cqe;
+ int idx;
+
++repoll:
+ cqe = next_cqe_sw(cq);
+ if (!cqe)
+ return -EAGAIN;
+@@ -379,7 +395,18 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
+ */
+ rmb();
+
+- /* TBD: resize CQ */
++ opcode = cqe64->op_own >> 4;
++ if (unlikely(opcode == MLX5_CQE_RESIZE_CQ)) {
++ if (likely(cq->resize_buf)) {
++ free_cq_buf(dev, &cq->buf);
++ cq->buf = *cq->resize_buf;
++ kfree(cq->resize_buf);
++ cq->resize_buf = NULL;
++ goto repoll;
++ } else {
++ mlx5_ib_warn(dev, "unexpected resize cqe\n");
++ }
++ }
+
+ qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff;
+ if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) {
+@@ -398,7 +425,6 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
+ }
+
+ wc->qp = &(*cur_qp)->ibqp;
+- opcode = cqe64->op_own >> 4;
+ switch (opcode) {
+ case MLX5_CQE_REQ:
+ wq = &(*cur_qp)->sq;
+@@ -503,15 +529,11 @@ static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf,
+ return err;
+
+ buf->cqe_size = cqe_size;
++ buf->nent = nent;
+
+ return 0;
+ }
+
+-static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
+-{
+- mlx5_buf_free(&dev->mdev, &buf->buf);
+-}
+-
+ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
+ struct ib_ucontext *context, struct mlx5_ib_cq *cq,
+ int entries, struct mlx5_create_cq_mbox_in **cqb,
+@@ -576,16 +598,16 @@ static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context)
+ ib_umem_release(cq->buf.umem);
+ }
+
+-static void init_cq_buf(struct mlx5_ib_cq *cq, int nent)
++static void init_cq_buf(struct mlx5_ib_cq *cq, struct mlx5_ib_cq_buf *buf)
+ {
+ int i;
+ void *cqe;
+ struct mlx5_cqe64 *cqe64;
+
+- for (i = 0; i < nent; i++) {
+- cqe = get_cqe(cq, i);
+- cqe64 = (cq->buf.cqe_size == 64) ? cqe : cqe + 64;
+- cqe64->op_own = 0xf1;
++ for (i = 0; i < buf->nent; i++) {
++ cqe = get_cqe_from_buf(buf, i, buf->cqe_size);
++ cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64;
++ cqe64->op_own = MLX5_CQE_INVALID << 4;
+ }
+ }
+
+@@ -610,7 +632,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
+ if (err)
+ goto err_db;
+
+- init_cq_buf(cq, entries);
++ init_cq_buf(cq, &cq->buf);
+
+ *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * cq->buf.buf.npages;
+ *cqb = mlx5_vzalloc(*inlen);
+@@ -836,7 +858,7 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
+ in->ctx.cq_period = cpu_to_be16(cq_period);
+ in->ctx.cq_max_count = cpu_to_be16(cq_count);
+ in->field_select = cpu_to_be32(fsel);
+- err = mlx5_core_modify_cq(&dev->mdev, &mcq->mcq, in);
++ err = mlx5_core_modify_cq(&dev->mdev, &mcq->mcq, in, sizeof(*in));
+ kfree(in);
+
+ if (err)
+@@ -845,9 +867,235 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
+ return err;
+ }
+
++static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
++ int entries, struct ib_udata *udata, int *npas,
++ int *page_shift, int *cqe_size)
++{
++ struct mlx5_ib_resize_cq ucmd;
++ struct ib_umem *umem;
++ int err;
++ int npages;
++ struct ib_ucontext *context = cq->buf.umem->context;
++
++ if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
++ return -EFAULT;
++
++ umem = ib_umem_get(context, ucmd.buf_addr, entries * ucmd.cqe_size,
++ IB_ACCESS_LOCAL_WRITE, 1);
++ if (IS_ERR(umem)) {
++ err = PTR_ERR(umem);
++ return err;
++ }
++
++ mlx5_ib_cont_pages(umem, ucmd.buf_addr, &npages, page_shift,
++ npas, NULL);
++
++ cq->resize_umem = umem;
++ *cqe_size = ucmd.cqe_size;
++
++ return 0;
++}
++
++static void un_resize_user(struct mlx5_ib_cq *cq)
++{
++ ib_umem_release(cq->resize_umem);
++}
++
++static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
++ int entries, int cqe_size)
++{
++ int err;
++
++ cq->resize_buf = kzalloc(sizeof(*cq->resize_buf), GFP_KERNEL);
++ if (!cq->resize_buf)
++ return -ENOMEM;
++
++ err = alloc_cq_buf(dev, cq->resize_buf, entries, cqe_size);
++ if (err)
++ goto ex;
++
++ init_cq_buf(cq, cq->resize_buf);
++
++ return 0;
++
++ex:
++ kfree(cq->resize_buf);
++ return err;
++}
++
++static void un_resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq)
++{
++ free_cq_buf(dev, cq->resize_buf);
++ cq->resize_buf = NULL;
++}
++
++static int copy_resize_cqes(struct mlx5_ib_cq *cq)
++{
++ struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
++ struct mlx5_cqe64 *scqe64;
++ struct mlx5_cqe64 *dcqe64;
++ void *start_cqe;
++ void *scqe;
++ void *dcqe;
++ int ssize;
++ int dsize;
++ int i;
++ u8 sw_own;
++
++ ssize = cq->buf.cqe_size;
++ dsize = cq->resize_buf->cqe_size;
++ if (ssize != dsize) {
++ mlx5_ib_warn(dev, "resize from different cqe size is not supported\n");
++ return -EINVAL;
++ }
++
++ i = cq->mcq.cons_index;
++ scqe = get_sw_cqe(cq, i);
++ scqe64 = ssize == 64 ? scqe : scqe + 64;
++ start_cqe = scqe;
++ if (!scqe) {
++ mlx5_ib_warn(dev, "expected cqe in sw ownership\n");
++ return -EINVAL;
++ }
++
++ while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) {
++ dcqe = get_cqe_from_buf(cq->resize_buf,
++ (i + 1) & (cq->resize_buf->nent),
++ dsize);
++ dcqe64 = dsize == 64 ? dcqe : dcqe + 64;
++ sw_own = sw_ownership_bit(i + 1, cq->resize_buf->nent);
++ memcpy(dcqe, scqe, dsize);
++ dcqe64->op_own = (dcqe64->op_own & ~MLX5_CQE_OWNER_MASK) | sw_own;
++
++ ++i;
++ scqe = get_sw_cqe(cq, i);
++ scqe64 = ssize == 64 ? scqe : scqe + 64;
++ if (!scqe) {
++ mlx5_ib_warn(dev, "expected cqe in sw ownership\n");
++ return -EINVAL;
++ }
++
++ if (scqe == start_cqe) {
++ pr_warn("resize CQ failed to get resize CQE, CQN 0x%x\n",
++ cq->mcq.cqn);
++ return -ENOMEM;
++ }
++ }
++ ++cq->mcq.cons_index;
++ return 0;
++}
++
+ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
+ {
+- return -ENOSYS;
++ struct mlx5_ib_dev *dev = to_mdev(ibcq->device);
++ struct mlx5_ib_cq *cq = to_mcq(ibcq);
++ struct mlx5_modify_cq_mbox_in *in;
++ int err;
++ int npas;
++ int page_shift;
++ int inlen;
++ int uninitialized_var(cqe_size);
++ unsigned long flags;
++
++ if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_RESIZE_CQ)) {
++ pr_info("Firmware does not support resize CQ\n");
++ return -ENOSYS;
++ }
++
++ if (entries < 1)
++ return -EINVAL;
++
++ entries = roundup_pow_of_two(entries + 1);
++ if (entries > dev->mdev.caps.max_cqes + 1)
++ return -EINVAL;
++
++ if (entries == ibcq->cqe + 1)
++ return 0;
++
++ mutex_lock(&cq->resize_mutex);
++ if (udata) {
++ err = resize_user(dev, cq, entries, udata, &npas, &page_shift,
++ &cqe_size);
++ } else {
++ cqe_size = 64;
++ err = resize_kernel(dev, cq, entries, cqe_size);
++ if (!err) {
++ npas = cq->resize_buf->buf.npages;
++ page_shift = cq->resize_buf->buf.page_shift;
++ }
++ }
++
++ if (err)
++ goto ex;
++
++ inlen = sizeof(*in) + npas * sizeof(in->pas[0]);
++ in = mlx5_vzalloc(inlen);
++ if (!in) {
++ err = -ENOMEM;
++ goto ex_resize;
++ }
++
++ if (udata)
++ mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift,
++ in->pas, 0);
++ else
++ mlx5_fill_page_array(&cq->resize_buf->buf, in->pas);
++
++ in->field_select = cpu_to_be32(MLX5_MODIFY_CQ_MASK_LOG_SIZE |
++ MLX5_MODIFY_CQ_MASK_PG_OFFSET |
++ MLX5_MODIFY_CQ_MASK_PG_SIZE);
++ in->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
++ in->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
++ in->ctx.page_offset = 0;
++ in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(entries) << 24);
++ in->hdr.opmod = cpu_to_be16(MLX5_CQ_OPMOD_RESIZE);
++ in->cqn = cpu_to_be32(cq->mcq.cqn);
++
++ err = mlx5_core_modify_cq(&dev->mdev, &cq->mcq, in, inlen);
++ if (err)
++ goto ex_alloc;
++
++ if (udata) {
++ cq->ibcq.cqe = entries - 1;
++ ib_umem_release(cq->buf.umem);
++ cq->buf.umem = cq->resize_umem;
++ cq->resize_umem = NULL;
++ } else {
++ struct mlx5_ib_cq_buf tbuf;
++ int resized = 0;
++
++ spin_lock_irqsave(&cq->lock, flags);
++ if (cq->resize_buf) {
++ err = copy_resize_cqes(cq);
++ if (!err) {
++ tbuf = cq->buf;
++ cq->buf = *cq->resize_buf;
++ kfree(cq->resize_buf);
++ cq->resize_buf = NULL;
++ resized = 1;
++ }
++ }
++ cq->ibcq.cqe = entries - 1;
++ spin_unlock_irqrestore(&cq->lock, flags);
++ if (resized)
++ free_cq_buf(dev, &tbuf);
++ }
++ mutex_unlock(&cq->resize_mutex);
++
++ mlx5_vfree(in);
++ return 0;
++
++ex_alloc:
++ mlx5_vfree(in);
++
++ex_resize:
++ if (udata)
++ un_resize_user(cq);
++ else
++ un_resize_kernel(dev, cq);
++ex:
++ mutex_unlock(&cq->resize_mutex);
++ return err;
+ }
+
+ int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq)
+diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
++++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
+@@ -195,6 +195,7 @@ struct mlx5_ib_cq_buf {
+ struct mlx5_buf buf;
+ struct ib_umem *umem;
+ int cqe_size;
++ int nent;
+ };
+
+ enum mlx5_ib_qp_flags {
+@@ -220,7 +221,7 @@ struct mlx5_ib_cq {
+ /* protect resize cq
+ */
+ struct mutex resize_mutex;
+- struct mlx5_ib_cq_resize *resize_buf;
++ struct mlx5_ib_cq_buf *resize_buf;
+ struct ib_umem *resize_umem;
+ int cqe_size;
+ };
+diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/user.h
++++ b/drivers/infiniband/hw/mlx5/user.h
+@@ -93,6 +93,9 @@ struct mlx5_ib_create_cq_resp {
+
+ struct mlx5_ib_resize_cq {
+ __u64 buf_addr;
++ __u16 cqe_size;
++ __u16 reserved0;
++ __u32 reserved1;
+ };
+
+ struct mlx5_ib_create_srq {
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+@@ -201,14 +201,14 @@ EXPORT_SYMBOL(mlx5_core_query_cq);
+
+
+ int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+- struct mlx5_modify_cq_mbox_in *in)
++ struct mlx5_modify_cq_mbox_in *in, int in_sz)
+ {
+ struct mlx5_modify_cq_mbox_out out;
+ int err;
+
+ memset(&out, 0, sizeof(out));
+ in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MODIFY_CQ);
+- err = mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out));
++ err = mlx5_cmd_exec(dev, in, in_sz, &out, sizeof(out));
+ if (err)
+ return err;
+
+diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/include/linux/mlx5/cq.h
++++ b/include/linux/mlx5/cq.h
+@@ -79,9 +79,10 @@ enum {
+ MLX5_CQE_RESP_SEND = 2,
+ MLX5_CQE_RESP_SEND_IMM = 3,
+ MLX5_CQE_RESP_SEND_INV = 4,
+- MLX5_CQE_RESIZE_CQ = 0xff, /* TBD */
++ MLX5_CQE_RESIZE_CQ = 5,
+ MLX5_CQE_REQ_ERR = 13,
+ MLX5_CQE_RESP_ERR = 14,
++ MLX5_CQE_INVALID = 15,
+ };
+
+ enum {
+@@ -90,6 +91,13 @@ enum {
+ MLX5_CQ_MODIFY_OVERRUN = 1 << 2,
+ };
+
++enum {
++ MLX5_CQ_OPMOD_RESIZE = 1,
++ MLX5_MODIFY_CQ_MASK_LOG_SIZE = 1 << 0,
++ MLX5_MODIFY_CQ_MASK_PG_OFFSET = 1 << 1,
++ MLX5_MODIFY_CQ_MASK_PG_SIZE = 1 << 2,
++};
++
+ struct mlx5_cq_modify_params {
+ int type;
+ union {
+@@ -158,7 +166,7 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
+ int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+ struct mlx5_query_cq_mbox_out *out);
+ int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+- struct mlx5_modify_cq_mbox_in *in);
++ struct mlx5_modify_cq_mbox_in *in, int in_sz);
+ int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
+ void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
+
+diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/include/linux/mlx5/device.h
++++ b/include/linux/mlx5/device.h
+@@ -178,6 +178,7 @@ enum {
+ MLX5_DEV_CAP_FLAG_ATOMIC = 1LL << 18,
+ MLX5_DEV_CAP_FLAG_ON_DMND_PG = 1LL << 24,
+ MLX5_DEV_CAP_FLAG_CQ_MODER = 1LL << 29,
++ MLX5_DEV_CAP_FLAG_RESIZE_CQ = 1LL << 30,
+ MLX5_DEV_CAP_FLAG_RESIZE_SRQ = 1LL << 32,
+ MLX5_DEV_CAP_FLAG_REMOTE_FENCE = 1LL << 38,
+ MLX5_DEV_CAP_FLAG_TLP_HINTS = 1LL << 39,
+@@ -710,6 +711,7 @@ struct mlx5_modify_cq_mbox_in {
+
+ struct mlx5_modify_cq_mbox_out {
+ struct mlx5_outbox_hdr hdr;
++ u8 rsvd[8];
+ };
+
+ struct mlx5_enable_hca_mbox_in {
--- /dev/null
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] mlx5_core: Improve debugfs readability
+
+Use strings to display transport service or state of QPs. Use numeric
+value for MTU of a QP.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/debugfs.c | 39 +++++++++++++++---
+ include/linux/mlx5/qp.h | 45 +++++++++++++++++++++
+ 2 files changed, 78 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+@@ -275,7 +275,7 @@ void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev)
+ }
+
+ static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
+- int index)
++ int index, int *is_str)
+ {
+ struct mlx5_query_qp_mbox_out *out;
+ struct mlx5_qp_context *ctx;
+@@ -293,19 +293,40 @@ static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
+ goto out;
+ }
+
++ *is_str = 0;
+ ctx = &out->ctx;
+ switch (index) {
+ case QP_PID:
+ param = qp->pid;
+ break;
+ case QP_STATE:
+- param = be32_to_cpu(ctx->flags) >> 28;
++ param = (u64)mlx5_qp_state_str(be32_to_cpu(ctx->flags) >> 28);
++ *is_str = 1;
+ break;
+ case QP_XPORT:
+- param = (be32_to_cpu(ctx->flags) >> 16) & 0xff;
++ param = (u64)mlx5_qp_type_str((be32_to_cpu(ctx->flags) >> 16) & 0xff);
++ *is_str = 1;
+ break;
+ case QP_MTU:
+- param = ctx->mtu_msgmax >> 5;
++ switch (ctx->mtu_msgmax >> 5) {
++ case IB_MTU_256:
++ param = 256;
++ break;
++ case IB_MTU_512:
++ param = 512;
++ break;
++ case IB_MTU_1024:
++ param = 1024;
++ break;
++ case IB_MTU_2048:
++ param = 2048;
++ break;
++ case IB_MTU_4096:
++ param = 4096;
++ break;
++ default:
++ param = 0;
++ }
+ break;
+ case QP_N_RECV:
+ param = 1 << ((ctx->rq_size_stride >> 3) & 0xf);
+@@ -414,6 +435,7 @@ static ssize_t dbg_read(struct file *filp, char __user *buf, size_t count,
+ struct mlx5_field_desc *desc;
+ struct mlx5_rsc_debug *d;
+ char tbuf[18];
++ int is_str = 0;
+ u64 field;
+ int ret;
+
+@@ -424,7 +446,7 @@ static ssize_t dbg_read(struct file *filp, char __user *buf, size_t count,
+ d = (void *)(desc - desc->i) - sizeof(*d);
+ switch (d->type) {
+ case MLX5_DBG_RSC_QP:
+- field = qp_read_field(d->dev, d->object, desc->i);
++ field = qp_read_field(d->dev, d->object, desc->i, &is_str);
+ break;
+
+ case MLX5_DBG_RSC_EQ:
+@@ -440,7 +462,12 @@ static ssize_t dbg_read(struct file *filp, char __user *buf, size_t count,
+ return -EINVAL;
+ }
+
+- ret = snprintf(tbuf, sizeof(tbuf), "0x%llx\n", field);
++
++ if (is_str)
++ ret = snprintf(tbuf, sizeof(tbuf), "%s\n", (const char *)field);
++ else
++ ret = snprintf(tbuf, sizeof(tbuf), "0x%llx\n", field);
++
+ if (ret > 0) {
+ if (copy_to_user(buf, tbuf, ret))
+ return -EFAULT;
+diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/include/linux/mlx5/qp.h
++++ b/include/linux/mlx5/qp.h
+@@ -464,4 +464,49 @@ void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev);
+ int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
+ void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
+
++static inline const char *mlx5_qp_type_str(int type)
++{
++ switch (type) {
++ case MLX5_QP_ST_RC: return "RC";
++ case MLX5_QP_ST_UC: return "C";
++ case MLX5_QP_ST_UD: return "UD";
++ case MLX5_QP_ST_XRC: return "XRC";
++ case MLX5_QP_ST_MLX: return "MLX";
++ case MLX5_QP_ST_QP0: return "QP0";
++ case MLX5_QP_ST_QP1: return "QP1";
++ case MLX5_QP_ST_RAW_ETHERTYPE: return "RAW_ETHERTYPE";
++ case MLX5_QP_ST_RAW_IPV6: return "RAW_IPV6";
++ case MLX5_QP_ST_SNIFFER: return "SNIFFER";
++ case MLX5_QP_ST_SYNC_UMR: return "SYNC_UMR";
++ case MLX5_QP_ST_PTP_1588: return "PTP_1588";
++ case MLX5_QP_ST_REG_UMR: return "REG_UMR";
++ default: return "Invalid transport type";
++ }
++}
++
++static inline const char *mlx5_qp_state_str(int state)
++{
++ switch (state) {
++ case MLX5_QP_STATE_RST:
++ return "RST";
++ case MLX5_QP_STATE_INIT:
++ return "INIT";
++ case MLX5_QP_STATE_RTR:
++ return "RTR";
++ case MLX5_QP_STATE_RTS:
++ return "RTS";
++ case MLX5_QP_STATE_SQER:
++ return "SQER";
++ case MLX5_QP_STATE_SQD:
++ return "SQD";
++ case MLX5_QP_STATE_ERR:
++ return "ERR";
++ case MLX5_QP_STATE_SQ_DRAINING:
++ return "SQ_DRAINING";
++ case MLX5_QP_STATE_SUSPENDED:
++ return "SUSPENDED";
++ default: return "Invalid QP state";
++ }
++}
++
+ #endif /* MLX5_QP_H */
--- /dev/null
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] mlx5_core: Fix PowerPC support
+
+1. Fix derivation of sub-page index from the dma address in free_4k.
+2. Fix the DMA address passed to dma_unmap_page by masking it properly.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ .../net/ethernet/mellanox/mlx5/core/pagealloc.c | 9 +++++----
+ include/linux/mlx5/device.h | 3 ++-
+ 2 files changed, 7 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+@@ -99,7 +99,7 @@ enum {
+
+ enum {
+ MLX5_MAX_RECLAIM_TIME_MILI = 5000,
+- MLX5_NUM_4K_IN_PAGE = PAGE_SIZE / 4096,
++ MLX5_NUM_4K_IN_PAGE = PAGE_SIZE / MLX5_ADAPTER_PAGE_SIZE,
+ };
+
+ static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id)
+@@ -206,7 +206,7 @@ static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr)
+ if (!fp->free_count)
+ list_del(&fp->list);
+
+- *addr = fp->addr + n * 4096;
++ *addr = fp->addr + n * MLX5_ADAPTER_PAGE_SIZE;
+
+ return 0;
+ }
+@@ -222,14 +222,15 @@ static void free_4k(struct mlx5_core_dev *dev, u64 addr)
+ return;
+ }
+
+- n = (addr & ~PAGE_MASK) % 4096;
++ n = (addr & ~PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT;
+ fwp->free_count++;
+ set_bit(n, &fwp->bitmask);
+ if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) {
+ rb_erase(&fwp->rb_node, &dev->priv.page_root);
+ if (fwp->free_count != 1)
+ list_del(&fwp->list);
+- dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
++ dma_unmap_page(&dev->pdev->dev, addr & PAGE_MASK, PAGE_SIZE,
++ DMA_BIDIRECTIONAL);
+ __free_page(fwp->page);
+ kfree(fwp);
+ } else if (fwp->free_count == 1) {
+diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/include/linux/mlx5/device.h
++++ b/include/linux/mlx5/device.h
+@@ -234,7 +234,8 @@ enum {
+ };
+
+ enum {
+- MLX5_ADAPTER_PAGE_SHIFT = 12
++ MLX5_ADAPTER_PAGE_SHIFT = 12,
++ MLX5_ADAPTER_PAGE_SIZE = 1 << MLX5_ADAPTER_PAGE_SHIFT,
+ };
+
+ enum {
--- /dev/null
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Allow creation of QPs with zero-length work queues
+
+The current code attmepts to call ib_umem_get() even if the length is
+zero, which causes a failure. Since the spec allows zero length work
+queues, change the code so we don't call ib_umem_get() in those cases.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/qp.c | 49 +++++++++++++++++++++++----------------
+ 1 files changed, 29 insertions(+), 20 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/qp.c
++++ b/drivers/infiniband/hw/mlx5/qp.c
+@@ -523,12 +523,12 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ {
+ struct mlx5_ib_ucontext *context;
+ struct mlx5_ib_create_qp ucmd;
+- int page_shift;
++ int page_shift = 0;
+ int uar_index;
+ int npages;
+- u32 offset;
++ u32 offset = 0;
+ int uuarn;
+- int ncont;
++ int ncont = 0;
+ int err;
+
+ err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
+@@ -564,23 +564,29 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ if (err)
+ goto err_uuar;
+
+- qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
+- qp->buf_size, 0, 0);
+- if (IS_ERR(qp->umem)) {
+- mlx5_ib_dbg(dev, "umem_get failed\n");
+- err = PTR_ERR(qp->umem);
+- goto err_uuar;
++ if (ucmd.buf_addr && qp->buf_size) {
++ qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
++ qp->buf_size, 0, 0);
++ if (IS_ERR(qp->umem)) {
++ mlx5_ib_dbg(dev, "umem_get failed\n");
++ err = PTR_ERR(qp->umem);
++ goto err_uuar;
++ }
++ } else {
++ qp->umem = NULL;
+ }
+
+- mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift,
+- &ncont, NULL);
+- err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset);
+- if (err) {
+- mlx5_ib_warn(dev, "bad offset\n");
+- goto err_umem;
++ if (qp->umem) {
++ mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift,
++ &ncont, NULL);
++ err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset);
++ if (err) {
++ mlx5_ib_warn(dev, "bad offset\n");
++ goto err_umem;
++ }
++ mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n",
++ ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset);
+ }
+- mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n",
+- ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset);
+
+ *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
+ *in = mlx5_vzalloc(*inlen);
+@@ -588,7 +594,8 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ err = -ENOMEM;
+ goto err_umem;
+ }
+- mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
++ if (qp->umem)
++ mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
+ (*in)->ctx.log_pg_sz_remote_qpn =
+ cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
+ (*in)->ctx.params2 = cpu_to_be32(offset << 6);
+@@ -619,7 +626,8 @@ err_free:
+ mlx5_vfree(*in);
+
+ err_umem:
+- ib_umem_release(qp->umem);
++ if (qp->umem)
++ ib_umem_release(qp->umem);
+
+ err_uuar:
+ free_uuar(&context->uuari, uuarn);
+@@ -632,7 +640,8 @@ static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp)
+
+ context = to_mucontext(pd->uobject->context);
+ mlx5_ib_db_unmap_user(context, &qp->db);
+- ib_umem_release(qp->umem);
++ if (qp->umem)
++ ib_umem_release(qp->umem);
+ free_uuar(&context->uuari, qp->uuarn);
+ }
+
--- /dev/null
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Abort driver cleanup if teardown hca fails
+
+Do not continue with cleanup flow. If this ever happens we can check which
+resources remained open.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/main.c | 10 ++++++++--
+ 1 files changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -460,7 +460,10 @@ disable_msix:
+
+ err_stop_poll:
+ mlx5_stop_health_poll(dev);
+- mlx5_cmd_teardown_hca(dev);
++ if (mlx5_cmd_teardown_hca(dev)) {
++ dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
++ return err;
++ }
+
+ err_pagealloc_stop:
+ mlx5_pagealloc_stop(dev);
+@@ -503,7 +506,10 @@ void mlx5_dev_cleanup(struct mlx5_core_dev *dev)
+ mlx5_eq_cleanup(dev);
+ mlx5_disable_msix(dev);
+ mlx5_stop_health_poll(dev);
+- mlx5_cmd_teardown_hca(dev);
++ if (mlx5_cmd_teardown_hca(dev)) {
++ dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
++ return;
++ }
+ mlx5_pagealloc_stop(dev);
+ mlx5_reclaim_startup_pages(dev);
+ mlx5_core_disable_hca(dev);
--- /dev/null
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Remove old field for create mkey mailbox
+
+Match firmware specification.
+
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ include/linux/mlx5/device.h | 4 ++--
+ 1 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/include/linux/mlx5/device.h
++++ b/include/linux/mlx5/device.h
+@@ -849,8 +849,8 @@ struct mlx5_create_mkey_mbox_in {
+ struct mlx5_mkey_seg seg;
+ u8 rsvd1[16];
+ __be32 xlat_oct_act_size;
+- __be32 bsf_coto_act_size;
+- u8 rsvd2[168];
++ __be32 rsvd2;
++ u8 rsvd3[168];
+ __be64 pas[0];
+ };
+
--- /dev/null
+From: Eli Cohen <eli@dev.mellanox.co.il>
+Subject: [PATCH] IB/mlx5: Verify reserved fields are cleared
+
+Verify that reserved fields in struct mlx5_ib_resize_cq are cleared
+before continuing execution of the verb. This is required to allow
+making use of this area in future revisions.
+
+Signed-off-by: Yann Droneaud <ydroneaud@opteya.com>
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/mlx5/cq.c | 8 ++++++--
+ 1 files changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
+index xxxxxxx..xxxxxxx xxxxxx
+--- a/drivers/infiniband/hw/mlx5/cq.c
++++ b/drivers/infiniband/hw/mlx5/cq.c
+@@ -877,8 +877,12 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
+ int npages;
+ struct ib_ucontext *context = cq->buf.umem->context;
+
+- if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
+- return -EFAULT;
++ err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
++ if (err)
++ return err;
++
++ if (ucmd.reserved0 || ucmd.reserved1)
++ return -EINVAL;
+
+ umem = ib_umem_get(context, ucmd.buf_addr, entries * ucmd.cqe_size,
+ IB_ACCESS_LOCAL_WRITE, 1);