diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-11-18 15:36:04 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-11-18 15:36:04 -0800 |
commit | 1ea406c0e08c717241275064046d29b5bac1b1db (patch) | |
tree | 6cc1f4badbc565f9e2d45651413c04bfacf4e366 /drivers/net | |
parent | a709bd585f291b9a6d0a3691ab4a0b9d3300a7a1 (diff) | |
parent | b4fdf52b3fc8dd3ce13ece334f5fdff869705429 (diff) |
Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband
Pull infiniband/rdma updates from Roland Dreier:
- Re-enable flow steering verbs with new improved userspace ABI
- Fixes for slow connection due to GID lookup scalability
- IPoIB fixes
- Many fixes to HW drivers including mlx4, mlx5, ocrdma and qib
- Further improvements to SRP error handling
- Add new transport type for Cisco usNIC
* tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (66 commits)
IB/core: Re-enable create_flow/destroy_flow uverbs
IB/core: extended command: an improved infrastructure for uverbs commands
IB/core: Remove ib_uverbs_flow_spec structure from userspace
IB/core: Use a common header for uverbs flow_specs
IB/core: Make uverbs flow structure use names like verbs ones
IB/core: Rename 'flow' structs to match other uverbs structs
IB/core: clarify overflow/underflow checks on ib_create/destroy_flow
IB/ucma: Convert use of typedef ctl_table to struct ctl_table
IB/cm: Convert to using idr_alloc_cyclic()
IB/mlx5: Fix page shift in create CQ for userspace
IB/mlx4: Fix device max capabilities check
IB/mlx5: Fix list_del of empty list
IB/mlx5: Remove dead code
IB/core: Encorce MR access rights rules on kernel consumers
IB/mlx4: Fix endless loop in resize CQ
RDMA/cma: Remove unused argument and minor dead code
RDMA/ucma: Discard events for IDs not yet claimed by user space
IB/core: Add Cisco usNIC rdma node and transport types
RDMA/nes: Remove self-assignment from nes_query_qp()
IB/srp: Report receive errors correctly
...
Diffstat (limited to 'drivers/net')
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 106 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/debugfs.c | 8 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/main.c | 35 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/mr.c | 32 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c | 196 |
6 files changed, 275 insertions, 104 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 6ca30739625f..8675d26a678b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -98,6 +98,7 @@ enum { static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd, struct mlx5_cmd_msg *in, struct mlx5_cmd_msg *out, + void *uout, int uout_size, mlx5_cmd_cbk_t cbk, void *context, int page_queue) { @@ -110,6 +111,8 @@ static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd, ent->in = in; ent->out = out; + ent->uout = uout; + ent->uout_size = uout_size; ent->callback = cbk; ent->context = context; ent->cmd = cmd; @@ -534,6 +537,7 @@ static void cmd_work_handler(struct work_struct *work) ent->lay = lay; memset(lay, 0, sizeof(*lay)); memcpy(lay->in, ent->in->first.data, sizeof(lay->in)); + ent->op = be32_to_cpu(lay->in[0]) >> 16; if (ent->in->next) lay->in_ptr = cpu_to_be64(ent->in->next->dma); lay->inlen = cpu_to_be32(ent->in->len); @@ -628,7 +632,8 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) * 2. page queue commands do not support asynchrous completion */ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, - struct mlx5_cmd_msg *out, mlx5_cmd_cbk_t callback, + struct mlx5_cmd_msg *out, void *uout, int uout_size, + mlx5_cmd_cbk_t callback, void *context, int page_queue, u8 *status) { struct mlx5_cmd *cmd = &dev->cmd; @@ -642,7 +647,8 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, if (callback && page_queue) return -EINVAL; - ent = alloc_cmd(cmd, in, out, callback, context, page_queue); + ent = alloc_cmd(cmd, in, out, uout, uout_size, callback, context, + page_queue); if (IS_ERR(ent)) return PTR_ERR(ent); @@ -670,10 +676,10 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode); if (op < ARRAY_SIZE(cmd->stats)) { stats = &cmd->stats[op]; - spin_lock(&stats->lock); + spin_lock_irq(&stats->lock); stats->sum += ds; ++stats->n; - spin_unlock(&stats->lock); + spin_unlock_irq(&stats->lock); } mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME, "fw exec time for %s is %lld nsec\n", @@ -826,7 +832,7 @@ static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev, int n; int i; - msg = kzalloc(sizeof(*msg), GFP_KERNEL); + msg = kzalloc(sizeof(*msg), flags); if (!msg) return ERR_PTR(-ENOMEM); @@ -1109,6 +1115,19 @@ void mlx5_cmd_use_polling(struct mlx5_core_dev *dev) up(&cmd->sem); } +static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg) +{ + unsigned long flags; + + if (msg->cache) { + spin_lock_irqsave(&msg->cache->lock, flags); + list_add_tail(&msg->list, &msg->cache->head); + spin_unlock_irqrestore(&msg->cache->lock, flags); + } else { + mlx5_free_cmd_msg(dev, msg); + } +} + void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector) { struct mlx5_cmd *cmd = &dev->cmd; @@ -1117,6 +1136,10 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector) void *context; int err; int i; + ktime_t t1, t2, delta; + s64 ds; + struct mlx5_cmd_stats *stats; + unsigned long flags; for (i = 0; i < (1 << cmd->log_sz); i++) { if (test_bit(i, &vector)) { @@ -1141,9 +1164,29 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector) } free_ent(cmd, ent->idx); if (ent->callback) { + t1 = timespec_to_ktime(ent->ts1); + t2 = timespec_to_ktime(ent->ts2); + delta = ktime_sub(t2, t1); + ds = ktime_to_ns(delta); + if (ent->op < ARRAY_SIZE(cmd->stats)) { + stats = &cmd->stats[ent->op]; + spin_lock_irqsave(&stats->lock, flags); + stats->sum += ds; + ++stats->n; + spin_unlock_irqrestore(&stats->lock, flags); + } + callback = ent->callback; context = ent->context; err = ent->ret; + if (!err) + err = mlx5_copy_from_msg(ent->uout, + ent->out, + ent->uout_size); + + mlx5_free_cmd_msg(dev, ent->out); + free_msg(dev, ent->in); + free_cmd(ent); callback(err, context); } else { @@ -1160,7 +1203,8 @@ static int status_to_err(u8 status) return status ? -1 : 0; /* TBD more meaningful codes */ } -static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size) +static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size, + gfp_t gfp) { struct mlx5_cmd_msg *msg = ERR_PTR(-ENOMEM); struct mlx5_cmd *cmd = &dev->cmd; @@ -1172,7 +1216,7 @@ static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size) ent = &cmd->cache.med; if (ent) { - spin_lock(&ent->lock); + spin_lock_irq(&ent->lock); if (!list_empty(&ent->head)) { msg = list_entry(ent->head.next, typeof(*msg), list); /* For cached lists, we must explicitly state what is @@ -1181,43 +1225,34 @@ static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size) msg->len = in_size; list_del(&msg->list); } - spin_unlock(&ent->lock); + spin_unlock_irq(&ent->lock); } if (IS_ERR(msg)) - msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, in_size); + msg = mlx5_alloc_cmd_msg(dev, gfp, in_size); return msg; } -static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg) -{ - if (msg->cache) { - spin_lock(&msg->cache->lock); - list_add_tail(&msg->list, &msg->cache->head); - spin_unlock(&msg->cache->lock); - } else { - mlx5_free_cmd_msg(dev, msg); - } -} - static int is_manage_pages(struct mlx5_inbox_hdr *in) { return be16_to_cpu(in->opcode) == MLX5_CMD_OP_MANAGE_PAGES; } -int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, - int out_size) +static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, + int out_size, mlx5_cmd_cbk_t callback, void *context) { struct mlx5_cmd_msg *inb; struct mlx5_cmd_msg *outb; int pages_queue; + gfp_t gfp; int err; u8 status = 0; pages_queue = is_manage_pages(in); + gfp = callback ? GFP_ATOMIC : GFP_KERNEL; - inb = alloc_msg(dev, in_size); + inb = alloc_msg(dev, in_size, gfp); if (IS_ERR(inb)) { err = PTR_ERR(inb); return err; @@ -1229,13 +1264,14 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, goto out_in; } - outb = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, out_size); + outb = mlx5_alloc_cmd_msg(dev, gfp, out_size); if (IS_ERR(outb)) { err = PTR_ERR(outb); goto out_in; } - err = mlx5_cmd_invoke(dev, inb, outb, NULL, NULL, pages_queue, &status); + err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context, + pages_queue, &status); if (err) goto out_out; @@ -1248,14 +1284,30 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, err = mlx5_copy_from_msg(out, outb, out_size); out_out: - mlx5_free_cmd_msg(dev, outb); + if (!callback) + mlx5_free_cmd_msg(dev, outb); out_in: - free_msg(dev, inb); + if (!callback) + free_msg(dev, inb); return err; } + +int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, + int out_size) +{ + return cmd_exec(dev, in, in_size, out, out_size, NULL, NULL); +} EXPORT_SYMBOL(mlx5_cmd_exec); +int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size, + void *out, int out_size, mlx5_cmd_cbk_t callback, + void *context) +{ + return cmd_exec(dev, in, in_size, out, out_size, callback, context); +} +EXPORT_SYMBOL(mlx5_cmd_exec_cb); + static void destroy_msg_cache(struct mlx5_core_dev *dev) { struct mlx5_cmd *cmd = &dev->cmd; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index 9c7194b26ee2..80f6d127257a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -154,10 +154,10 @@ static ssize_t average_read(struct file *filp, char __user *buf, size_t count, return 0; stats = filp->private_data; - spin_lock(&stats->lock); + spin_lock_irq(&stats->lock); if (stats->n) field = div64_u64(stats->sum, stats->n); - spin_unlock(&stats->lock); + spin_unlock_irq(&stats->lock); ret = snprintf(tbuf, sizeof(tbuf), "%llu\n", field); if (ret > 0) { if (copy_to_user(buf, tbuf, ret)) @@ -175,10 +175,10 @@ static ssize_t average_write(struct file *filp, const char __user *buf, struct mlx5_cmd_stats *stats; stats = filp->private_data; - spin_lock(&stats->lock); + spin_lock_irq(&stats->lock); stats->sum = 0; stats->n = 0; - spin_unlock(&stats->lock); + spin_unlock_irq(&stats->lock); *pos += count; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 2231d93cc7ad..64a61b286b2c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -354,7 +354,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_EQ); in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(eq->nent) << 24 | uar->index); in->ctx.intr = vecidx; - in->ctx.log_page_size = PAGE_SHIFT - 12; + in->ctx.log_page_size = eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT; in->events_mask = cpu_to_be64(mask); err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index bc0f5fb66e24..40a9f5ed814d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -159,6 +159,36 @@ struct mlx5_reg_host_endianess { u8 rsvd[15]; }; + +#define CAP_MASK(pos, size) ((u64)((1 << (size)) - 1) << (pos)) + +enum { + MLX5_CAP_BITS_RW_MASK = CAP_MASK(MLX5_CAP_OFF_CMDIF_CSUM, 2) | + CAP_MASK(MLX5_CAP_OFF_DCT, 1), +}; + +/* selectively copy writable fields clearing any reserved area + */ +static void copy_rw_fields(struct mlx5_hca_cap *to, struct mlx5_hca_cap *from) +{ + u64 v64; + + to->log_max_qp = from->log_max_qp & 0x1f; + to->log_max_ra_req_dc = from->log_max_ra_req_dc & 0x3f; + to->log_max_ra_res_dc = from->log_max_ra_res_dc & 0x3f; + to->log_max_ra_req_qp = from->log_max_ra_req_qp & 0x3f; + to->log_max_ra_res_qp = from->log_max_ra_res_qp & 0x3f; + to->log_max_atomic_size_qp = from->log_max_atomic_size_qp; + to->log_max_atomic_size_dc = from->log_max_atomic_size_dc; + v64 = be64_to_cpu(from->flags) & MLX5_CAP_BITS_RW_MASK; + to->flags = cpu_to_be64(v64); +} + +enum { + HCA_CAP_OPMOD_GET_MAX = 0, + HCA_CAP_OPMOD_GET_CUR = 1, +}; + static int handle_hca_cap(struct mlx5_core_dev *dev) { struct mlx5_cmd_query_hca_cap_mbox_out *query_out = NULL; @@ -180,7 +210,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) } query_ctx.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_HCA_CAP); - query_ctx.hdr.opmod = cpu_to_be16(0x1); + query_ctx.hdr.opmod = cpu_to_be16(HCA_CAP_OPMOD_GET_CUR); err = mlx5_cmd_exec(dev, &query_ctx, sizeof(query_ctx), query_out, sizeof(*query_out)); if (err) @@ -192,8 +222,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) goto query_ex; } - memcpy(&set_ctx->hca_cap, &query_out->hca_cap, - sizeof(set_ctx->hca_cap)); + copy_rw_fields(&set_ctx->hca_cap, &query_out->hca_cap); if (dev->profile->mask & MLX5_PROF_MASK_QP_SIZE) set_ctx->hca_cap.log_max_qp = dev->profile->log_max_qp; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 5b44e2e46daf..35e514dc7b7d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -37,31 +37,41 @@ #include "mlx5_core.h" int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, - struct mlx5_create_mkey_mbox_in *in, int inlen) + struct mlx5_create_mkey_mbox_in *in, int inlen, + mlx5_cmd_cbk_t callback, void *context, + struct mlx5_create_mkey_mbox_out *out) { - struct mlx5_create_mkey_mbox_out out; + struct mlx5_create_mkey_mbox_out lout; int err; u8 key; - memset(&out, 0, sizeof(out)); - spin_lock(&dev->priv.mkey_lock); + memset(&lout, 0, sizeof(lout)); + spin_lock_irq(&dev->priv.mkey_lock); key = dev->priv.mkey_key++; - spin_unlock(&dev->priv.mkey_lock); + spin_unlock_irq(&dev->priv.mkey_lock); in->seg.qpn_mkey7_0 |= cpu_to_be32(key); in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_MKEY); - err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); + if (callback) { + err = mlx5_cmd_exec_cb(dev, in, inlen, out, sizeof(*out), + callback, context); + return err; + } else { + err = mlx5_cmd_exec(dev, in, inlen, &lout, sizeof(lout)); + } + if (err) { mlx5_core_dbg(dev, "cmd exec faile %d\n", err); return err; } - if (out.hdr.status) { - mlx5_core_dbg(dev, "status %d\n", out.hdr.status); - return mlx5_cmd_status_to_err(&out.hdr); + if (lout.hdr.status) { + mlx5_core_dbg(dev, "status %d\n", lout.hdr.status); + return mlx5_cmd_status_to_err(&lout.hdr); } - mr->key = mlx5_idx_to_mkey(be32_to_cpu(out.mkey) & 0xffffff) | key; - mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", be32_to_cpu(out.mkey), key, mr->key); + mr->key = mlx5_idx_to_mkey(be32_to_cpu(lout.mkey) & 0xffffff) | key; + mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", + be32_to_cpu(lout.mkey), key, mr->key); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 7b12acf210f8..37b6ad1f9a1b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -57,10 +57,13 @@ struct mlx5_pages_req { }; struct fw_page { - struct rb_node rb_node; - u64 addr; - struct page *page; - u16 func_id; + struct rb_node rb_node; + u64 addr; + struct page *page; + u16 func_id; + unsigned long bitmask; + struct list_head list; + unsigned free_count; }; struct mlx5_query_pages_inbox { @@ -94,6 +97,11 @@ enum { MAX_RECLAIM_TIME_MSECS = 5000, }; +enum { + MLX5_MAX_RECLAIM_TIME_MILI = 5000, + MLX5_NUM_4K_IN_PAGE = PAGE_SIZE / 4096, +}; + static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id) { struct rb_root *root = &dev->priv.page_root; @@ -101,6 +109,7 @@ static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u struct rb_node *parent = NULL; struct fw_page *nfp; struct fw_page *tfp; + int i; while (*new) { parent = *new; @@ -113,25 +122,29 @@ static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u return -EEXIST; } - nfp = kmalloc(sizeof(*nfp), GFP_KERNEL); + nfp = kzalloc(sizeof(*nfp), GFP_KERNEL); if (!nfp) return -ENOMEM; nfp->addr = addr; nfp->page = page; nfp->func_id = func_id; + nfp->free_count = MLX5_NUM_4K_IN_PAGE; + for (i = 0; i < MLX5_NUM_4K_IN_PAGE; i++) + set_bit(i, &nfp->bitmask); rb_link_node(&nfp->rb_node, parent, new); rb_insert_color(&nfp->rb_node, root); + list_add(&nfp->list, &dev->priv.free_list); return 0; } -static struct page *remove_page(struct mlx5_core_dev *dev, u64 addr) +static struct fw_page *find_fw_page(struct mlx5_core_dev *dev, u64 addr) { struct rb_root *root = &dev->priv.page_root; struct rb_node *tmp = root->rb_node; - struct page *result = NULL; + struct fw_page *result = NULL; struct fw_page *tfp; while (tmp) { @@ -141,9 +154,7 @@ static struct page *remove_page(struct mlx5_core_dev *dev, u64 addr) } else if (tfp->addr > addr) { tmp = tmp->rb_right; } else { - rb_erase(&tfp->rb_node, root); - result = tfp->page; - kfree(tfp); + result = tfp; break; } } @@ -176,12 +187,98 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id, return err; } +static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr) +{ + struct fw_page *fp; + unsigned n; + + if (list_empty(&dev->priv.free_list)) { + return -ENOMEM; + mlx5_core_warn(dev, "\n"); + } + + fp = list_entry(dev->priv.free_list.next, struct fw_page, list); + n = find_first_bit(&fp->bitmask, 8 * sizeof(fp->bitmask)); + if (n >= MLX5_NUM_4K_IN_PAGE) { + mlx5_core_warn(dev, "alloc 4k bug\n"); + return -ENOENT; + } + clear_bit(n, &fp->bitmask); + fp->free_count--; + if (!fp->free_count) + list_del(&fp->list); + + *addr = fp->addr + n * 4096; + + return 0; +} + +static void free_4k(struct mlx5_core_dev *dev, u64 addr) +{ + struct fw_page *fwp; + int n; + + fwp = find_fw_page(dev, addr & PAGE_MASK); + if (!fwp) { + mlx5_core_warn(dev, "page not found\n"); + return; + } + + n = (addr & ~PAGE_MASK) % 4096; + fwp->free_count++; + set_bit(n, &fwp->bitmask); + if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) { + rb_erase(&fwp->rb_node, &dev->priv.page_root); + if (fwp->free_count != 1) + list_del(&fwp->list); + dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + __free_page(fwp->page); + kfree(fwp); + } else if (fwp->free_count == 1) { + list_add(&fwp->list, &dev->priv.free_list); + } +} + +static int alloc_system_page(struct mlx5_core_dev *dev, u16 func_id) +{ + struct page *page; + u64 addr; + int err; + + page = alloc_page(GFP_HIGHUSER); + if (!page) { + mlx5_core_warn(dev, "failed to allocate page\n"); + return -ENOMEM; + } + addr = dma_map_page(&dev->pdev->dev, page, 0, + PAGE_SIZE, DMA_BIDIRECTIONAL); + if (dma_mapping_error(&dev->pdev->dev, addr)) { + mlx5_core_warn(dev, "failed dma mapping page\n"); + err = -ENOMEM; + goto out_alloc; + } + err = insert_page(dev, addr, page, func_id); + if (err) { + mlx5_core_err(dev, "failed to track allocated page\n"); + goto out_mapping; + } + + return 0; + +out_mapping: + dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + +out_alloc: + __free_page(page); + + return err; +} static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, int notify_fail) { struct mlx5_manage_pages_inbox *in; struct mlx5_manage_pages_outbox out; - struct page *page; + struct mlx5_manage_pages_inbox *nin; int inlen; u64 addr; int err; @@ -196,27 +293,15 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, memset(&out, 0, sizeof(out)); for (i = 0; i < npages; i++) { - page = alloc_page(GFP_HIGHUSER); - if (!page) { - err = -ENOMEM; - mlx5_core_warn(dev, "failed to allocate page\n"); - goto out_alloc; - } - addr = dma_map_page(&dev->pdev->dev, page, 0, - PAGE_SIZE, DMA_BIDIRECTIONAL); - if (dma_mapping_error(&dev->pdev->dev, addr)) { - mlx5_core_warn(dev, "failed dma mapping page\n"); - __free_page(page); - err = -ENOMEM; - goto out_alloc; - } - err = insert_page(dev, addr, page, func_id); +retry: + err = alloc_4k(dev, &addr); if (err) { - mlx5_core_err(dev, "failed to track allocated page\n"); - dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL); - __free_page(page); - err = -ENOMEM; - goto out_alloc; + if (err == -ENOMEM) + err = alloc_system_page(dev, func_id); + if (err) + goto out_4k; + + goto retry; } in->pas[i] = cpu_to_be64(addr); } @@ -226,7 +311,6 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, in->func_id = cpu_to_be16(func_id); in->num_entries = cpu_to_be32(npages); err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); - mlx5_core_dbg(dev, "err %d\n", err); if (err) { mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n", func_id, npages, err); goto out_alloc; @@ -247,25 +331,22 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, out_alloc: if (notify_fail) { - memset(in, 0, inlen); - memset(&out, 0, sizeof(out)); - in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES); - in->hdr.opmod = cpu_to_be16(MLX5_PAGES_CANT_GIVE); - if (mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out))) - mlx5_core_warn(dev, "\n"); - } - for (i--; i >= 0; i--) { - addr = be64_to_cpu(in->pas[i]); - page = remove_page(dev, addr); - if (!page) { - mlx5_core_err(dev, "BUG: can't remove page at addr 0x%llx\n", - addr); - continue; + nin = kzalloc(sizeof(*nin), GFP_KERNEL); + if (!nin) { + mlx5_core_warn(dev, "allocation failed\n"); + goto out_4k; } - dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL); - __free_page(page); + memset(&out, 0, sizeof(out)); + nin->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES); + nin->hdr.opmod = cpu_to_be16(MLX5_PAGES_CANT_GIVE); + if (mlx5_cmd_exec(dev, nin, sizeof(*nin), &out, sizeof(out))) + mlx5_core_warn(dev, "page notify failed\n"); + kfree(nin); } +out_4k: + for (i--; i >= 0; i--) + free_4k(dev, be64_to_cpu(in->pas[i])); out_free: mlx5_vfree(in); return err; @@ -276,7 +357,6 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, { struct mlx5_manage_pages_inbox in; struct mlx5_manage_pages_outbox *out; - struct page *page; int num_claimed; int outlen; u64 addr; @@ -315,13 +395,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, for (i = 0; i < num_claimed; i++) { addr = be64_to_cpu(out->pas[i]); - page = remove_page(dev, addr); - if (!page) { - mlx5_core_warn(dev, "FW reported unknown DMA address 0x%llx\n", addr); - } else { - dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL); - __free_page(page); - } + free_4k(dev, addr); } out_free: @@ -381,14 +455,19 @@ int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot) return give_pages(dev, func_id, npages, 0); } +enum { + MLX5_BLKS_FOR_RECLAIM_PAGES = 12 +}; + static int optimal_reclaimed_pages(void) { struct mlx5_cmd_prot_block *block; struct mlx5_cmd_layout *lay; int ret; - ret = (sizeof(lay->in) + sizeof(block->data) - - sizeof(struct mlx5_manage_pages_outbox)) / 8; + ret = (sizeof(lay->out) + MLX5_BLKS_FOR_RECLAIM_PAGES * sizeof(block->data) - + sizeof(struct mlx5_manage_pages_outbox)) / + FIELD_SIZEOF(struct mlx5_manage_pages_outbox, pas[0]); return ret; } @@ -427,6 +506,7 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) void mlx5_pagealloc_init(struct mlx5_core_dev *dev) { dev->priv.page_root = RB_ROOT; + INIT_LIST_HEAD(&dev->priv.free_list); } void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev) |