From 0904f32796d4bb2d8102cd0056d8634f247ce45a Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Fri, 1 Jul 2016 16:00:55 -0700 Subject: IB/hfi1: Remove unnecessary done label in hfi1_write_iter Simple code clean up of hfi1_write_iter. Reviewed-by: Dennis Dalessandro Signed-off-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) (limited to 'drivers/infiniband/hw/hfi1/file_ops.c') diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index c702a009608f..2f097d942f9c 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -392,41 +392,38 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) struct hfi1_filedata *fd = kiocb->ki_filp->private_data; struct hfi1_user_sdma_pkt_q *pq = fd->pq; struct hfi1_user_sdma_comp_q *cq = fd->cq; - int ret = 0, done = 0, reqs = 0; + int done = 0, reqs = 0; unsigned long dim = from->nr_segs; - if (!cq || !pq) { - ret = -EIO; - goto done; - } + if (!cq || !pq) + return -EIO; - if (!iter_is_iovec(from) || !dim) { - ret = -EINVAL; - goto done; - } + if (!iter_is_iovec(from) || !dim) + return -EINVAL; hfi1_cdbg(SDMA, "SDMA request from %u:%u (%lu)", fd->uctxt->ctxt, fd->subctxt, dim); - if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) { - ret = -ENOSPC; - goto done; - } + if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) + return -ENOSPC; while (dim) { + int ret; unsigned long count = 0; ret = hfi1_user_sdma_process_request( kiocb->ki_filp, (struct iovec *)(from->iov + done), dim, &count); - if (ret) - goto done; + if (ret) { + reqs = ret; + break; + } dim -= count; done += count; reqs++; } -done: - return ret ? ret : reqs; + + return reqs; } static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) -- cgit v1.2.3 From b094a36f90975373c3a241839869217a65f17d81 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Mon, 25 Jul 2016 07:54:57 -0700 Subject: IB/hfi1: Refine user process affinity algorithm When performing process affinity recommendations for MPI ranks, the current algorithm doesn't take into account multiple HFI units. Also, real cores and HT cores are not distinguished from one another. Therefore, all HT cores are recommended to be assigned first within the local NUMA node before recommending the assignments of cores in other NUMA nodes. It's ideal to assign all real cores across all NUMA nodes first, then all HT 1 cores, then all HT 2 cores, and so on to balance CPU workload. CPU cores in other NUMA nodes could be running interrupt handlers, and this is not taken into account. To balance the CPU workload for user processes, the following recommendation algorithm is used: For each user process that is opening a context on HFI Y: a) If all cores are assigned to user processes, start assignments all over from the first core b) Assign real cores first, then HT cores (First set of HT cores on all physical cores, then second set of HT cores, and, so on) in the following order: 1. Same NUMA node as HFI Y and not running an IRQ handler 2. Same NUMA node as HFI Y and running an IRQ handler 3. Different NUMA node to HFI Y and not running an IRQ handler 4. Different NUMA node to HFI Y and running an IRQ handler c) Mark core as assigned in the global affinity structure. As user processes are done, remove core assignments from global affinity structure. This implementation allows an arbitrary number of HT cores and provides support for multiple HFIs. This is being included in the kernel rather than user space due to the fact that user space has no way of knowing the CPU recommendations for contexts running as part of other jobs. Reviewed-by: Ira Weiny Reviewed-by: Mitko Haralanov Reviewed-by: Dennis Dalessandro Signed-off-by: Sebastian Sanchez Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband/hw/hfi1/file_ops.c') diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 2f097d942f9c..d7c07bc7bd14 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -715,7 +715,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) hfi1_user_sdma_free_queues(fdata); /* release the cpu */ - hfi1_put_proc_affinity(dd, fdata->rec_cpu_num); + hfi1_put_proc_affinity(fdata->rec_cpu_num); /* * Clear any left over, unhandled events so the next process that @@ -815,9 +815,10 @@ static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo) ret = find_shared_ctxt(fp, uinfo); if (ret < 0) goto done_unlock; - if (ret) - fd->rec_cpu_num = hfi1_get_proc_affinity( - fd->uctxt->dd, fd->uctxt->numa_id); + if (ret) { + fd->rec_cpu_num = + hfi1_get_proc_affinity(fd->uctxt->numa_id); + } } /* @@ -929,7 +930,11 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, if (ctxt == dd->num_rcv_contexts) return -EBUSY; - fd->rec_cpu_num = hfi1_get_proc_affinity(dd, -1); + /* + * If we don't have a NUMA node requested, preference is towards + * device NUMA node. + */ + fd->rec_cpu_num = hfi1_get_proc_affinity(dd->node); if (fd->rec_cpu_num != -1) numa = cpu_to_node(fd->rec_cpu_num); else -- cgit v1.2.3 From 8e1f52df978ec17475e1184ed9f72078babcbbfa Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Thu, 28 Jul 2016 12:27:26 -0400 Subject: IB/hfi1: Remove unused uctxt->subpid and uctxt->pid These are no longer needed. Reviewed-by: Ira Weiny Signed-off-by: Dean Luick Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/infiniband/hw/hfi1/file_ops.c') diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index d7c07bc7bd14..b80c8d2ac52b 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -727,7 +727,6 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) if (--uctxt->cnt) { uctxt->active_slaves &= ~(1 << fdata->subctxt); - uctxt->subpid[fdata->subctxt] = 0; mutex_unlock(&hfi1_mutex); goto done; } @@ -753,7 +752,6 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE, hfi1_pkt_default_send_ctxt_mask(dd, uctxt->sc->type)); sc_disable(uctxt->sc); - uctxt->pid = 0; spin_unlock_irqrestore(&dd->uctxt_lock, flags); dd->rcd[uctxt->ctxt] = NULL; @@ -893,7 +891,6 @@ static int find_shared_ctxt(struct file *fp, } fd->uctxt = uctxt; fd->subctxt = uctxt->cnt++; - uctxt->subpid[fd->subctxt] = current->pid; uctxt->active_slaves |= 1 << fd->subctxt; ret = 1; goto done; @@ -978,7 +975,6 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, return ret; } uctxt->userversion = uinfo->userversion; - uctxt->pid = current->pid; uctxt->flags = HFI1_CAP_UGET(MASK); init_waitqueue_head(&uctxt->wait); strlcpy(uctxt->comm, current->comm, sizeof(uctxt->comm)); -- cgit v1.2.3 From ea3a0ee52db0c2ec8d1d0ecdd21e650e6e183085 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 28 Jul 2016 12:27:35 -0400 Subject: IB/hfi1: Restructure hfi1_file_open Rearrange the file open call in prep for new changes. Reviewed-by: Dean Luick Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/hw/hfi1/file_ops.c') diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index b80c8d2ac52b..0522bafb190b 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -168,6 +168,7 @@ static inline int is_valid_mmap(u64 token) static int hfi1_file_open(struct inode *inode, struct file *fp) { + struct hfi1_filedata *fd; struct hfi1_devdata *dd = container_of(inode->i_cdev, struct hfi1_devdata, user_cdev); @@ -176,10 +177,15 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) kobject_get(&dd->kobj); /* The real work is performed later in assign_ctxt() */ - fp->private_data = kzalloc(sizeof(struct hfi1_filedata), GFP_KERNEL); - if (fp->private_data) /* no cpu affinity by default */ - ((struct hfi1_filedata *)fp->private_data)->rec_cpu_num = -1; - return fp->private_data ? 0 : -ENOMEM; + + fd = kzalloc(sizeof(*fd), GFP_KERNEL); + + if (fd) /* no cpu affinity by default */ + fd->rec_cpu_num = -1; + + fp->private_data = fd; + + return fd ? 0 : -ENOMEM; } static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, -- cgit v1.2.3 From bdf7752e072f91fbeb1739da3938d4392ea8a51f Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Thu, 28 Jul 2016 15:21:13 -0400 Subject: IB/hfi1: Use the same capability state for all shared contexts Save the current capability state at user context creation time. Report this saved value for all shared contexts. Also get rid of unnecessary hfi1_get_base_kinfo function. Reviewed-by: Ira Weiny Signed-off-by: Dean Luick Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'drivers/infiniband/hw/hfi1/file_ops.c') diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 0522bafb190b..1f4cd5aa2071 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -981,7 +981,7 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, return ret; } uctxt->userversion = uinfo->userversion; - uctxt->flags = HFI1_CAP_UGET(MASK); + uctxt->flags = hfi1_cap_mask; /* save current flag state */ init_waitqueue_head(&uctxt->wait); strlcpy(uctxt->comm, current->comm, sizeof(uctxt->comm)); memcpy(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid)); @@ -1084,18 +1084,18 @@ static int user_init(struct file *fp) hfi1_set_ctxt_jkey(uctxt->dd, uctxt->ctxt, uctxt->jkey); rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB; - if (HFI1_CAP_KGET_MASK(uctxt->flags, HDRSUPP)) + if (HFI1_CAP_UGET_MASK(uctxt->flags, HDRSUPP)) rcvctrl_ops |= HFI1_RCVCTRL_TIDFLOW_ENB; /* * Ignore the bit in the flags for now until proper * support for multiple packet per rcv array entry is * added. */ - if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR)) + if (!HFI1_CAP_UGET_MASK(uctxt->flags, MULTI_PKT_EGR)) rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB; - if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL)) + if (HFI1_CAP_UGET_MASK(uctxt->flags, NODROP_EGR_FULL)) rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB; - if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL)) + if (HFI1_CAP_UGET_MASK(uctxt->flags, NODROP_RHQ_FULL)) rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB; /* * The RcvCtxtCtrl.TailUpd bit has to be explicitly written. @@ -1103,7 +1103,7 @@ static int user_init(struct file *fp) * uses of the chip or ctxt. Therefore, add the rcvctrl op * for both cases. */ - if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL)) + if (HFI1_CAP_UGET_MASK(uctxt->flags, DMA_RTAIL)) rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB; else rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS; @@ -1126,9 +1126,10 @@ static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len) int ret = 0; memset(&cinfo, 0, sizeof(cinfo)); - ret = hfi1_get_base_kinfo(uctxt, &cinfo); - if (ret < 0) - goto done; + cinfo.runtime_flags = (((uctxt->flags >> HFI1_CAP_MISC_SHIFT) & + HFI1_CAP_MISC_MASK) << HFI1_CAP_USER_SHIFT) | + HFI1_CAP_UGET_MASK(uctxt->flags, MASK) | + HFI1_CAP_KGET_MASK(uctxt->flags, K2U); cinfo.num_active = hfi1_count_active_units(); cinfo.unit = uctxt->dd->unit; cinfo.ctxt = uctxt->ctxt; @@ -1150,7 +1151,7 @@ static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len) trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, cinfo); if (copy_to_user(ubase, &cinfo, sizeof(cinfo))) ret = -EFAULT; -done: + return ret; } -- cgit v1.2.3 From 3faa3d9a308e539cc48355b1f419a5ed9f8274a2 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 28 Jul 2016 15:21:19 -0400 Subject: IB/hfi1: Make use of mm consistent The hfi1 driver registers a mmu_notifier callback when /dev/hfi1_* is opened, and unregisters it when the device is closed. The driver incorrectly assumes that the close will always happen from the same context as the open. In particular, closes due to SIGKILL or OOM killer activity may happen from a different context. In these cases, the wrong mm is passed to mmu_notifier_unregister(), which causes improper reference counting for the victim mm, and eventual memory corruption. Preserve the mm for all open file descriptors and use this mm rather than current->mm for memory operations for the lifetime of that fd. Note: this patch leaves 1 use of current->mm in place. This use is removed in a follow on patch because other functional changes were required prior to that use being removed. If registration fails, there is no reason to keep the handler object around. Free the handler object rather than add it to the list to prevent any mmu_notifier operations, including unregister, when registration fails. Suggested-by: Jim Foraker Reviewed-by: Dean Luick Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw/hfi1/file_ops.c') diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 1f4cd5aa2071..302f0cdd8119 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -180,8 +180,10 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) fd = kzalloc(sizeof(*fd), GFP_KERNEL); - if (fd) /* no cpu affinity by default */ - fd->rec_cpu_num = -1; + if (fd) { + fd->rec_cpu_num = -1; /* no cpu affinity by default */ + fd->mm = current->mm; + } fp->private_data = fd; -- cgit v1.2.3 From 622c202c4a4697636334761d7ca295ebd35074e4 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Thu, 28 Jul 2016 15:21:21 -0400 Subject: IB/hfi1: Fix TID caching actions Per file descriptor TID caching actions depend on a global that can change midway through the lifetime of that file descriptor. Make the use of caching consistent for the life of the file descriptor by using the presence of the cache handler to decide when to use the cache functions. Reviewed-by: Ira Weiny Signed-off-by: Dean Luick Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/infiniband/hw/hfi1/file_ops.c') diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 302f0cdd8119..4f39bffad74a 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -1132,6 +1132,10 @@ static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len) HFI1_CAP_MISC_MASK) << HFI1_CAP_USER_SHIFT) | HFI1_CAP_UGET_MASK(uctxt->flags, MASK) | HFI1_CAP_KGET_MASK(uctxt->flags, K2U); + /* adjust flag if this fd is not able to cache */ + if (!fd->handler) + cinfo.runtime_flags |= HFI1_CAP_TID_UNMAP; /* no caching */ + cinfo.num_active = hfi1_count_active_units(); cinfo.unit = uctxt->dd->unit; cinfo.ctxt = uctxt->ctxt; -- cgit v1.2.3