From d5eb8373b2cee72a6f1f2157130b56e6a88894dc Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Sun, 14 Feb 2021 15:35:56 +0200 Subject: habanalabs: replace GFP_ATOMIC with GFP_KERNEL As there are incorrect assumptions in which some of the initialization and data path flows cannot sleep, most allocations are being done using GFP_ATOMIC. We modify the code to use GFP_ATOMIC only when realy needed, as sleepable flow should use GFP_KERNEL. In addition add a fallback to allocate memory using GFP_KERNEL, once ATOMIC allocation fails. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/command_buffer.c | 12 +++++++++--- .../misc/habanalabs/common/command_submission.c | 22 +++++++++++++++++++++- drivers/misc/habanalabs/common/device.c | 2 +- drivers/misc/habanalabs/common/memory.c | 10 +++------- drivers/misc/habanalabs/gaudi/gaudi.c | 2 +- drivers/misc/habanalabs/goya/goya.c | 2 +- 6 files changed, 36 insertions(+), 14 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c index d9adb9a5e4d8..719168c980a4 100644 --- a/drivers/misc/habanalabs/common/command_buffer.c +++ b/drivers/misc/habanalabs/common/command_buffer.c @@ -181,7 +181,7 @@ static void cb_release(struct kref *ref) static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, int ctx_id, bool internal_cb) { - struct hl_cb *cb; + struct hl_cb *cb = NULL; u32 cb_offset; void *p; @@ -193,9 +193,10 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, * the kernel's copy. Hence, we must never sleep in this code section * and must use GFP_ATOMIC for all memory allocations. */ - if (ctx_id == HL_KERNEL_ASID_ID) + if (ctx_id == HL_KERNEL_ASID_ID && !hdev->disabled) cb = kzalloc(sizeof(*cb), GFP_ATOMIC); - else + + if (!cb) cb = kzalloc(sizeof(*cb), GFP_KERNEL); if (!cb) @@ -214,6 +215,9 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, } else if (ctx_id == HL_KERNEL_ASID_ID) { p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size, &cb->bus_address, GFP_ATOMIC); + if (!p) + p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, + cb_size, &cb->bus_address, GFP_KERNEL); } else { p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size, &cb->bus_address, @@ -310,6 +314,8 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, spin_lock(&mgr->cb_lock); rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC); + if (rc < 0) + rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_KERNEL); spin_unlock(&mgr->cb_lock); if (rc < 0) { diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index e1a441e2b2a2..20c3793feb27 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -638,6 +638,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, cntr = &hdev->aggregated_cs_counters; cs = kzalloc(sizeof(*cs), GFP_ATOMIC); + if (!cs) + cs = kzalloc(sizeof(*cs), GFP_KERNEL); + if (!cs) { atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); atomic64_inc(&cntr->out_of_mem_drop_cnt); @@ -657,6 +660,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, spin_lock_init(&cs->job_lock); cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC); + if (!cs_cmpl) + cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_KERNEL); + if (!cs_cmpl) { atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); atomic64_inc(&cntr->out_of_mem_drop_cnt); @@ -698,6 +704,10 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC); + if (!cs->jobs_in_queue_cnt) + cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, + sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL); + if (!cs->jobs_in_queue_cnt) { atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); atomic64_inc(&cntr->out_of_mem_drop_cnt); @@ -927,6 +937,9 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, struct hl_cs_job *job; job = kzalloc(sizeof(*job), GFP_ATOMIC); + if (!job) + job = kzalloc(sizeof(*job), GFP_KERNEL); + if (!job) return NULL; @@ -1029,6 +1042,9 @@ static int hl_cs_copy_chunk_array(struct hl_device *hdev, *cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array), GFP_ATOMIC); + if (!*cs_chunk_array) + *cs_chunk_array = kmalloc_array(num_chunks, + sizeof(**cs_chunk_array), GFP_KERNEL); if (!*cs_chunk_array) { atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt); @@ -1483,6 +1499,10 @@ static int cs_ioctl_extract_signal_seq(struct hl_device *hdev, signal_seq_arr = kmalloc_array(signal_seq_arr_len, sizeof(*signal_seq_arr), GFP_ATOMIC); + if (!signal_seq_arr) + signal_seq_arr = kmalloc_array(signal_seq_arr_len, + sizeof(*signal_seq_arr), + GFP_KERNEL); if (!signal_seq_arr) { atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt); @@ -1931,7 +1951,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, hl_ctx_get(hdev, ctx); - pend = kmalloc(sizeof(*pend), GFP_ATOMIC); + pend = kmalloc(sizeof(*pend), GFP_KERNEL); if (!pend) { hl_ctx_put(ctx); return -ENOMEM; diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 96b8f39c2539..c94e8ca6edf1 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -344,7 +344,7 @@ static int device_early_init(struct hl_device *hdev) if (hdev->asic_prop.completion_queues_count) { hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count, sizeof(*hdev->cq_wq), - GFP_ATOMIC); + GFP_KERNEL); if (!hdev->cq_wq) { rc = -ENOMEM; goto asid_fini; diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index c2679896aa50..1b69573369cf 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -145,7 +145,7 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, spin_lock(&vm->idr_lock); handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0, - GFP_ATOMIC); + GFP_KERNEL); spin_unlock(&vm->idr_lock); if (handle < 0) { @@ -1596,7 +1596,7 @@ static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size, rc = sg_alloc_table_from_pages(userptr->sgt, userptr->pages, - npages, offset, size, GFP_ATOMIC); + npages, offset, size, GFP_KERNEL); if (rc < 0) { dev_err(hdev->dev, "failed to create SG table from pages\n"); goto put_pages; @@ -1646,11 +1646,7 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, return -EINVAL; } - /* - * This function can be called also from data path, hence use atomic - * always as it is not a big allocation. - */ - userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_ATOMIC); + userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_KERNEL); if (!userptr->sgt) return -ENOMEM; diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 4f39737b4808..eee9387c8a26 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -4873,7 +4873,7 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev, parser->job_userptr_list, &userptr)) goto already_pinned; - userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC); + userptr = kzalloc(sizeof(*userptr), GFP_KERNEL); if (!userptr) return -ENOMEM; diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 20abed6ef7e6..1eaf9c06227c 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -3221,7 +3221,7 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev, parser->job_userptr_list, &userptr)) goto already_pinned; - userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC); + userptr = kzalloc(sizeof(*userptr), GFP_KERNEL); if (!userptr) return -ENOMEM; -- cgit v1.2.3