summaryrefslogtreecommitdiff
path: root/drivers/infiniband/core
diff options
context:
space:
mode:
authorJason Gunthorpe <jgg@nvidia.com>2020-11-30 09:58:37 +0200
committerJason Gunthorpe <jgg@nvidia.com>2020-12-07 14:06:23 -0400
commit6e0954b11c056570cb29676a84e2f8dc4d1dd05e (patch)
tree0c72abb6ece5e7f6fb2bde31a5d0596020e22840 /drivers/infiniband/core
parentadac4cb3c1ff5c47c9f47be5d017a0e054176e3c (diff)
RDMA/uverbs: Allow drivers to create a new HW object during rereg_mr
mlx5 has an ugly flow where it tries to allocate a new MR and replace the existing MR in the same memory during rereg. This is very complicated and buggy. Instead of trying to replace in-place inside the driver, provide support from uverbs to change the entire HW object assigned to a handle during rereg_mr. Since destroying a MR is allowed to fail (ie if a MW is pointing at it) and can't be detected in advance, the algorithm creates a completely new uobject to hold the new MR and swaps the IDR entries of the two objects. The old MR in the temporary IDR entry is destroyed, and if it fails rereg_mr succeeds and destruction is deferred to FD release. This complexity is why this cannot live in a driver safely. Link: https://lore.kernel.org/r/20201130075839.278575-4-leon@kernel.org Signed-off-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r--drivers/infiniband/core/rdma_core.c51
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c85
2 files changed, 116 insertions, 20 deletions
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index c44079b9158e..75eafd9208aa 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -595,6 +595,27 @@ static void alloc_commit_idr_uobject(struct ib_uobject *uobj)
WARN_ON(old != NULL);
}
+static void swap_idr_uobjects(struct ib_uobject *obj_old,
+ struct ib_uobject *obj_new)
+{
+ struct ib_uverbs_file *ufile = obj_old->ufile;
+ void *old;
+
+ /*
+ * New must be an object that been allocated but not yet committed, this
+ * moves the pre-committed state to obj_old, new still must be comitted.
+ */
+ old = xa_cmpxchg(&ufile->idr, obj_old->id, obj_old, XA_ZERO_ENTRY,
+ GFP_KERNEL);
+ if (WARN_ON(old != obj_old))
+ return;
+
+ swap(obj_old->id, obj_new->id);
+
+ old = xa_cmpxchg(&ufile->idr, obj_old->id, NULL, obj_old, GFP_KERNEL);
+ WARN_ON(old != NULL);
+}
+
static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
{
int fd = uobj->id;
@@ -641,6 +662,35 @@ void rdma_alloc_commit_uobject(struct ib_uobject *uobj,
}
/*
+ * new_uobj will be assigned to the handle currently used by to_uobj, and
+ * to_uobj will be destroyed.
+ *
+ * Upon return the caller must do:
+ * rdma_alloc_commit_uobject(new_uobj)
+ * uobj_put_destroy(to_uobj)
+ *
+ * to_uobj must have a write get but the put mode switches to destroy once
+ * this is called.
+ */
+void rdma_assign_uobject(struct ib_uobject *to_uobj, struct ib_uobject *new_uobj,
+ struct uverbs_attr_bundle *attrs)
+{
+ assert_uverbs_usecnt(new_uobj, UVERBS_LOOKUP_WRITE);
+
+ if (WARN_ON(to_uobj->uapi_object != new_uobj->uapi_object ||
+ !to_uobj->uapi_object->type_class->swap_uobjects))
+ return;
+
+ to_uobj->uapi_object->type_class->swap_uobjects(to_uobj, new_uobj);
+
+ /*
+ * If this fails then the uobject is still completely valid (though with
+ * a new ID) and we leak it until context close.
+ */
+ uverbs_destroy_uobject(to_uobj, RDMA_REMOVE_DESTROY, attrs);
+}
+
+/*
* This consumes the kref for uobj. It is up to the caller to unwind the HW
* object and anything else connected to uobj before calling this.
*/
@@ -747,6 +797,7 @@ const struct uverbs_obj_type_class uverbs_idr_class = {
.lookup_put = lookup_put_idr_uobject,
.destroy_hw = destroy_hw_idr_uobject,
.remove_handle = remove_handle_idr_uobject,
+ .swap_uobjects = swap_idr_uobjects,
};
EXPORT_SYMBOL(uverbs_idr_class);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 817b25045acd..98a5d36813ff 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -764,11 +764,14 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_rereg_mr cmd;
struct ib_uverbs_rereg_mr_resp resp;
- struct ib_pd *pd = NULL;
struct ib_mr *mr;
- struct ib_pd *old_pd;
int ret;
struct ib_uobject *uobj;
+ struct ib_uobject *new_uobj;
+ struct ib_device *ib_dev;
+ struct ib_pd *orig_pd;
+ struct ib_pd *new_pd;
+ struct ib_mr *new_mr;
ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
@@ -801,31 +804,69 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs)
goto put_uobjs;
}
+ orig_pd = mr->pd;
if (cmd.flags & IB_MR_REREG_PD) {
- pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle,
- attrs);
- if (!pd) {
+ new_pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle,
+ attrs);
+ if (!new_pd) {
ret = -EINVAL;
goto put_uobjs;
}
+ } else {
+ new_pd = mr->pd;
}
- old_pd = mr->pd;
- ret = mr->device->ops.rereg_user_mr(mr, cmd.flags, cmd.start,
- cmd.length, cmd.hca_va,
- cmd.access_flags, pd,
- &attrs->driver_udata);
- if (ret)
+ /*
+ * The driver might create a new HW object as part of the rereg, we need
+ * to have a uobject ready to hold it.
+ */
+ new_uobj = uobj_alloc(UVERBS_OBJECT_MR, attrs, &ib_dev);
+ if (IS_ERR(new_uobj)) {
+ ret = PTR_ERR(new_uobj);
goto put_uobj_pd;
-
- if (cmd.flags & IB_MR_REREG_PD) {
- atomic_inc(&pd->usecnt);
- mr->pd = pd;
- atomic_dec(&old_pd->usecnt);
}
- if (cmd.flags & IB_MR_REREG_TRANS)
- mr->iova = cmd.hca_va;
+ new_mr = ib_dev->ops.rereg_user_mr(mr, cmd.flags, cmd.start, cmd.length,
+ cmd.hca_va, cmd.access_flags, new_pd,
+ &attrs->driver_udata);
+ if (IS_ERR(new_mr)) {
+ ret = PTR_ERR(new_mr);
+ goto put_new_uobj;
+ }
+ if (new_mr) {
+ new_mr->device = new_pd->device;
+ new_mr->pd = new_pd;
+ new_mr->type = IB_MR_TYPE_USER;
+ new_mr->dm = NULL;
+ new_mr->sig_attrs = NULL;
+ new_mr->uobject = uobj;
+ atomic_inc(&new_pd->usecnt);
+ new_mr->iova = cmd.hca_va;
+ new_uobj->object = new_mr;
+
+ rdma_restrack_new(&new_mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_set_name(&new_mr->res, NULL);
+ rdma_restrack_add(&new_mr->res);
+
+ /*
+ * The new uobj for the new HW object is put into the same spot
+ * in the IDR and the old uobj & HW object is deleted.
+ */
+ rdma_assign_uobject(uobj, new_uobj, attrs);
+ rdma_alloc_commit_uobject(new_uobj, attrs);
+ uobj_put_destroy(uobj);
+ new_uobj = NULL;
+ uobj = NULL;
+ mr = new_mr;
+ } else {
+ if (cmd.flags & IB_MR_REREG_PD) {
+ atomic_dec(&orig_pd->usecnt);
+ mr->pd = new_pd;
+ atomic_inc(&new_pd->usecnt);
+ }
+ if (cmd.flags & IB_MR_REREG_TRANS)
+ mr->iova = cmd.hca_va;
+ }
memset(&resp, 0, sizeof(resp));
resp.lkey = mr->lkey;
@@ -833,12 +874,16 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs)
ret = uverbs_response(attrs, &resp, sizeof(resp));
+put_new_uobj:
+ if (new_uobj)
+ uobj_alloc_abort(new_uobj, attrs);
put_uobj_pd:
if (cmd.flags & IB_MR_REREG_PD)
- uobj_put_obj_read(pd);
+ uobj_put_obj_read(new_pd);
put_uobjs:
- uobj_put_write(uobj);
+ if (uobj)
+ uobj_put_write(uobj);
return ret;
}