diff options
Diffstat (limited to 'drivers/misc/habanalabs')
-rw-r--r-- | drivers/misc/habanalabs/goya/goya.c | 190 | ||||
-rw-r--r-- | drivers/misc/habanalabs/goya/goyaP.h | 29 | ||||
-rw-r--r-- | drivers/misc/habanalabs/habanalabs.h | 12 | ||||
-rw-r--r-- | drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h | 1 | ||||
-rw-r--r-- | drivers/misc/habanalabs/memory.c | 12 | ||||
-rw-r--r-- | drivers/misc/habanalabs/mmu.c | 285 |
6 files changed, 361 insertions, 168 deletions
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 447d907bddf3..7c2edabe20bd 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -304,6 +304,7 @@ static u32 goya_non_fatal_events[GOYA_ASYC_EVENT_GROUP_NON_FATAL_SIZE] = { static int goya_armcp_info_get(struct hl_device *hdev); static void goya_mmu_prepare(struct hl_device *hdev, u32 asid); static int goya_mmu_clear_pgt_range(struct hl_device *hdev); +static int goya_mmu_set_dram_default_page(struct hl_device *hdev); static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr); @@ -345,6 +346,7 @@ static void goya_get_fixed_properties(struct hl_device *hdev) SRAM_USER_BASE_OFFSET; prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR; + prop->mmu_dram_default_page_addr = MMU_DRAM_DEFAULT_PAGE_ADDR; if (hdev->pldm) prop->mmu_pgt_size = 0x800000; /* 8MB */ else @@ -359,6 +361,8 @@ static void goya_get_fixed_properties(struct hl_device *hdev) prop->va_space_host_end_address = VA_HOST_SPACE_END; prop->va_space_dram_start_address = VA_DDR_SPACE_START; prop->va_space_dram_end_address = VA_DDR_SPACE_END; + prop->dram_size_for_default_page_mapping = + prop->va_space_dram_end_address; prop->cfg_size = CFG_SIZE; prop->max_asid = MAX_ASID; prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE; @@ -816,6 +820,12 @@ static int goya_late_init(struct hl_device *hdev) goto disable_pci_access; } + rc = goya_mmu_set_dram_default_page(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to set DRAM default page\n"); + goto disable_pci_access; + } + return 0; disable_pci_access: @@ -2648,6 +2658,7 @@ static int goya_mmu_init(struct hl_device *hdev) return 0; hdev->dram_supports_virtual_memory = true; + hdev->dram_default_page_mapping = true; for (i = 0 ; i < prop->max_asid ; i++) { hop0_addr = prop->mmu_pgt_addr + @@ -4303,98 +4314,6 @@ static void goya_update_eq_ci(struct hl_device *hdev, u32 val) WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_6, val); } -static int goya_context_switch(struct hl_device *hdev, u32 asid) -{ - struct asic_fixed_properties *prop = &hdev->asic_prop; - struct packet_lin_dma *clear_sram_pkt; - struct hl_cs_parser parser; - struct hl_cs_job *job; - u32 cb_size; - struct hl_cb *cb; - int rc; - - cb = hl_cb_kernel_create(hdev, PAGE_SIZE); - if (!cb) - return -EFAULT; - - clear_sram_pkt = (struct packet_lin_dma *) - (uintptr_t) cb->kernel_address; - - memset(clear_sram_pkt, 0, sizeof(*clear_sram_pkt)); - cb_size = sizeof(*clear_sram_pkt); - - clear_sram_pkt->ctl = ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) | - (DMA_HOST_TO_SRAM << GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT) | - (1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) | - (1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) | - (1 << GOYA_PKT_CTL_RB_SHIFT) | - (1 << GOYA_PKT_CTL_MB_SHIFT)); - - clear_sram_pkt->src_addr = 0x7777777777777777ull; - clear_sram_pkt->dst_addr = prop->sram_base_address; - if (hdev->pldm) - clear_sram_pkt->tsize = 0x10000; - else - clear_sram_pkt->tsize = prop->sram_size; - - job = hl_cs_allocate_job(hdev, true); - if (!job) { - dev_err(hdev->dev, "Failed to allocate a new job\n"); - rc = -ENOMEM; - goto release_cb; - } - - job->id = 0; - job->user_cb = cb; - job->user_cb->cs_cnt++; - job->user_cb_size = cb_size; - job->hw_queue_id = GOYA_QUEUE_ID_DMA_0; - - hl_debugfs_add_job(hdev, job); - - parser.ctx_id = HL_KERNEL_ASID_ID; - parser.cs_sequence = 0; - parser.job_id = job->id; - parser.hw_queue_id = job->hw_queue_id; - parser.job_userptr_list = &job->userptr_list; - parser.user_cb = job->user_cb; - parser.user_cb_size = job->user_cb_size; - parser.ext_queue = job->ext_queue; - parser.use_virt_addr = hdev->mmu_enable; - - rc = hdev->asic_funcs->cs_parser(hdev, &parser); - if (rc) { - dev_err(hdev->dev, - "Failed to parse kernel CB during context switch\n"); - goto free_job; - } - - job->patched_cb = parser.patched_cb; - job->job_cb_size = parser.patched_cb_size; - job->patched_cb->cs_cnt++; - - rc = goya_send_job_on_qman0(hdev, job); - - /* no point in setting the asid in case of failure */ - if (!rc) - goya_mmu_prepare(hdev, asid); - - job->patched_cb->cs_cnt--; - hl_cb_put(job->patched_cb); - -free_job: - hl_userptr_delete_list(hdev, &job->userptr_list); - hl_debugfs_remove_job(hdev, job); - kfree(job); - cb->cs_cnt--; - -release_cb: - hl_cb_put(cb); - hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT); - - return rc; -} - static void goya_restore_phase_topology(struct hl_device *hdev) { int i, num_of_sob_in_longs, num_of_mon_in_longs; @@ -4864,41 +4783,37 @@ void *goya_get_events_stat(struct hl_device *hdev, u32 *size) return goya->events_stat; } -static int goya_mmu_clear_pgt_range(struct hl_device *hdev) +static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u32 size, + u64 val, bool is_dram) { - struct asic_fixed_properties *prop = &hdev->asic_prop; - struct goya_device *goya = hdev->asic_specific; - struct packet_lin_dma *clear_pgt_range_pkt; + struct packet_lin_dma *lin_dma_pkt; struct hl_cs_parser parser; struct hl_cs_job *job; u32 cb_size; struct hl_cb *cb; int rc; - if (!(goya->hw_cap_initialized & HW_CAP_MMU)) - return 0; - cb = hl_cb_kernel_create(hdev, PAGE_SIZE); if (!cb) return -EFAULT; - clear_pgt_range_pkt = (struct packet_lin_dma *) - (uintptr_t) cb->kernel_address; + lin_dma_pkt = (struct packet_lin_dma *) (uintptr_t) cb->kernel_address; + + memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt)); + cb_size = sizeof(*lin_dma_pkt); - memset(clear_pgt_range_pkt, 0, sizeof(*clear_pgt_range_pkt)); - cb_size = sizeof(*clear_pgt_range_pkt); + lin_dma_pkt->ctl = ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) | + (1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) | + (1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) | + (1 << GOYA_PKT_CTL_RB_SHIFT) | + (1 << GOYA_PKT_CTL_MB_SHIFT)); - clear_pgt_range_pkt->ctl = - ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) | - (DMA_HOST_TO_DRAM << GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT) | - (1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) | - (1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) | - (1 << GOYA_PKT_CTL_RB_SHIFT) | - (1 << GOYA_PKT_CTL_MB_SHIFT)); + lin_dma_pkt->ctl |= (is_dram ? DMA_HOST_TO_DRAM : DMA_HOST_TO_SRAM) << + GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT; - clear_pgt_range_pkt->src_addr = 0; - clear_pgt_range_pkt->dst_addr = prop->mmu_pgt_addr; - clear_pgt_range_pkt->tsize = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE; + lin_dma_pkt->src_addr = val; + lin_dma_pkt->dst_addr = addr; + lin_dma_pkt->tsize = size; job = hl_cs_allocate_job(hdev, true); if (!job) { @@ -4927,8 +4842,7 @@ static int goya_mmu_clear_pgt_range(struct hl_device *hdev) rc = hdev->asic_funcs->cs_parser(hdev, &parser); if (rc) { - dev_err(hdev->dev, - "Failed to parse kernel CB when clearing pgt\n"); + dev_err(hdev->dev, "Failed to parse kernel CB\n"); goto free_job; } @@ -4954,6 +4868,52 @@ release_cb: return rc; } +static int goya_context_switch(struct hl_device *hdev, u32 asid) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 addr = prop->sram_base_address; + u32 size = hdev->pldm ? 0x10000 : prop->sram_size; + u64 val = 0x7777777777777777ull; + int rc; + + rc = goya_memset_device_memory(hdev, addr, size, val, false); + if (rc) { + dev_err(hdev->dev, "Failed to clear SRAM in context switch\n"); + return rc; + } + + goya_mmu_prepare(hdev, asid); + + return 0; +} + +static int goya_mmu_clear_pgt_range(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct goya_device *goya = hdev->asic_specific; + u64 addr = prop->mmu_pgt_addr; + u32 size = prop->mmu_pgt_size + MMU_DRAM_DEFAULT_PAGE_SIZE + + MMU_CACHE_MNG_SIZE; + + if (!(goya->hw_cap_initialized & HW_CAP_MMU)) + return 0; + + return goya_memset_device_memory(hdev, addr, size, 0, true); +} + +static int goya_mmu_set_dram_default_page(struct hl_device *hdev) +{ + struct goya_device *goya = hdev->asic_specific; + u64 addr = hdev->asic_prop.mmu_dram_default_page_addr; + u32 size = MMU_DRAM_DEFAULT_PAGE_SIZE; + u64 val = 0x9999999999999999ull; + + if (!(goya->hw_cap_initialized & HW_CAP_MMU)) + return 0; + + return goya_memset_device_memory(hdev, addr, size, val, true); +} + static void goya_mmu_prepare(struct hl_device *hdev, u32 asid) { struct goya_device *goya = hdev->asic_specific; diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h index 0631bc133cce..830551b6b062 100644 --- a/drivers/misc/habanalabs/goya/goyaP.h +++ b/drivers/misc/habanalabs/goya/goyaP.h @@ -56,18 +56,23 @@ /* DRAM Memory Map */ -#define CPU_FW_IMAGE_SIZE 0x10000000 /* 256MB */ -#define MMU_PAGE_TABLES_SIZE 0x0E000000 /* 224MB */ -#define MMU_CACHE_MNG_SIZE 0x00001000 /* 4KB */ -#define CPU_PQ_PKT_SIZE 0x00001000 /* 4KB */ -#define CPU_PQ_DATA_SIZE 0x01FFE000 /* 32MB - 8KB */ - -#define CPU_FW_IMAGE_ADDR DRAM_PHYS_BASE -#define MMU_PAGE_TABLES_ADDR (CPU_FW_IMAGE_ADDR + CPU_FW_IMAGE_SIZE) -#define MMU_CACHE_MNG_ADDR (MMU_PAGE_TABLES_ADDR + MMU_PAGE_TABLES_SIZE) -#define CPU_PQ_PKT_ADDR (MMU_CACHE_MNG_ADDR + MMU_CACHE_MNG_SIZE) -#define CPU_PQ_DATA_ADDR (CPU_PQ_PKT_ADDR + CPU_PQ_PKT_SIZE) -#define DRAM_BASE_ADDR_USER (CPU_PQ_DATA_ADDR + CPU_PQ_DATA_SIZE) +#define CPU_FW_IMAGE_SIZE 0x10000000 /* 256MB */ +#define MMU_PAGE_TABLES_SIZE 0x0DE00000 /* 222MB */ +#define MMU_DRAM_DEFAULT_PAGE_SIZE 0x00200000 /* 2MB */ +#define MMU_CACHE_MNG_SIZE 0x00001000 /* 4KB */ +#define CPU_PQ_PKT_SIZE 0x00001000 /* 4KB */ +#define CPU_PQ_DATA_SIZE 0x01FFE000 /* 32MB - 8KB */ + +#define CPU_FW_IMAGE_ADDR DRAM_PHYS_BASE +#define MMU_PAGE_TABLES_ADDR (CPU_FW_IMAGE_ADDR + CPU_FW_IMAGE_SIZE) +#define MMU_DRAM_DEFAULT_PAGE_ADDR (MMU_PAGE_TABLES_ADDR + \ + MMU_PAGE_TABLES_SIZE) +#define MMU_CACHE_MNG_ADDR (MMU_DRAM_DEFAULT_PAGE_ADDR + \ + MMU_DRAM_DEFAULT_PAGE_SIZE) +#define CPU_PQ_PKT_ADDR (MMU_CACHE_MNG_ADDR + \ + MMU_CACHE_MNG_SIZE) +#define CPU_PQ_DATA_ADDR (CPU_PQ_PKT_ADDR + CPU_PQ_PKT_SIZE) +#define DRAM_BASE_ADDR_USER (CPU_PQ_DATA_ADDR + CPU_PQ_DATA_SIZE) #if (DRAM_BASE_ADDR_USER != 0x20000000) #error "KMD must reserve 512MB" diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index ee29971822c6..59b25c6fae00 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -143,7 +143,10 @@ enum hl_device_hw_state { * mapping DRAM memory. * @va_space_dram_end_address: end address of virtual memory range for * mapping DRAM memory. + * @dram_size_for_default_page_mapping: DRAM size needed to map to avoid page + * fault. * @mmu_pgt_addr: base physical address in DRAM of MMU page tables. + * @mmu_dram_default_page_addr: DRAM default page physical address. * @mmu_pgt_size: MMU page tables total size. * @mmu_pte_size: PTE size in MMU page tables. * @mmu_hop_table_size: MMU hop table size. @@ -182,7 +185,9 @@ struct asic_fixed_properties { u64 va_space_host_end_address; u64 va_space_dram_start_address; u64 va_space_dram_end_address; + u64 dram_size_for_default_page_mapping; u64 mmu_pgt_addr; + u64 mmu_dram_default_page_addr; u32 mmu_pgt_size; u32 mmu_pte_size; u32 mmu_hop_table_size; @@ -592,6 +597,8 @@ struct hl_va_range { * @cs_sequence: sequence number for CS. Value is assigned to a CS and passed * to user so user could inquire about CS. It is used as * index to cs_pending array. + * @dram_default_hops: array that holds all hops addresses needed for default + * DRAM mapping. * @cs_lock: spinlock to protect cs_sequence. * @dram_phys_mem: amount of used physical DRAM memory by this context. * @thread_restore_token: token to prevent multiple threads of the same context @@ -615,6 +622,7 @@ struct hl_ctx { struct mutex mmu_lock; struct list_head debugfs_list; u64 cs_sequence; + u64 *dram_default_hops; spinlock_t cs_lock; atomic64_t dram_phys_mem; atomic_t thread_restore_token; @@ -1068,6 +1076,7 @@ struct hl_device_reset_work { * @reset_on_lockup: true if a reset should be done in case of stuck CS, false * otherwise. * @dram_supports_virtual_memory: is MMU enabled towards DRAM. + * @dram_default_page_mapping: is DRAM default page mapping enabled. * @init_done: is the initialization of the device done. * @mmu_enable: is MMU enabled. */ @@ -1135,6 +1144,7 @@ struct hl_device { u8 heartbeat; u8 reset_on_lockup; u8 dram_supports_virtual_memory; + u8 dram_default_page_mapping; u8 init_done; /* Parameters for bring-up */ @@ -1329,7 +1339,7 @@ bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, u32 size, int hl_mmu_init(struct hl_device *hdev); void hl_mmu_fini(struct hl_device *hdev); -void hl_mmu_ctx_init(struct hl_ctx *ctx); +int hl_mmu_ctx_init(struct hl_ctx *ctx); void hl_mmu_ctx_fini(struct hl_ctx *ctx); int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size); int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size); diff --git a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h index 1bc36aba1426..b680052ee3f0 100644 --- a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h +++ b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h @@ -36,6 +36,7 @@ #define HL_PTE_SIZE sizeof(u64) #define HOP_TABLE_SIZE PAGE_SIZE_4KB +#define PTE_ENTRIES_IN_HOP (HOP_TABLE_SIZE / HL_PTE_SIZE) #define HOP0_TABLES_TOTAL_SIZE (HOP_TABLE_SIZE * MAX_ASID) #define MMU_HOP0_PA43_12_SHIFT 12 diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c index 660cf67258fd..3a12fd1a5274 100644 --- a/drivers/misc/habanalabs/memory.c +++ b/drivers/misc/habanalabs/memory.c @@ -925,8 +925,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, goto map_err; } - hdev->asic_funcs->mmu_invalidate_cache_range(hdev, false, ctx->asid, - ret_vaddr, phys_pg_pack->total_size); + hdev->asic_funcs->mmu_invalidate_cache(hdev, false); mutex_unlock(&ctx->mmu_lock); @@ -1050,8 +1049,7 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr) dev_warn_ratelimited(hdev->dev, "unmap failed for vaddr: 0x%llx\n", next_vaddr); - hdev->asic_funcs->mmu_invalidate_cache_range(hdev, true, ctx->asid, - vaddr, phys_pg_pack->total_size); + hdev->asic_funcs->mmu_invalidate_cache(hdev, true); mutex_unlock(&ctx->mmu_lock); @@ -1455,7 +1453,11 @@ static int hl_vm_ctx_init_with_ranges(struct hl_ctx *ctx, u64 host_range_start, struct hl_device *hdev = ctx->hdev; int rc; - hl_mmu_ctx_init(ctx); + rc = hl_mmu_ctx_init(ctx); + if (rc) { + dev_err(hdev->dev, "failed to init context %d\n", ctx->asid); + return rc; + } mutex_init(&ctx->mem_hash_lock); hash_init(ctx->mem_hash); diff --git a/drivers/misc/habanalabs/mmu.c b/drivers/misc/habanalabs/mmu.c index 79c70d92e74b..a7187f9a5948 100644 --- a/drivers/misc/habanalabs/mmu.c +++ b/drivers/misc/habanalabs/mmu.c @@ -151,7 +151,7 @@ static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte, if (hop_addr == ULLONG_MAX) { hop_addr = alloc_hop(ctx); - *is_new_hop = true; + *is_new_hop = (hop_addr != ULLONG_MAX); } return hop_addr; @@ -234,22 +234,122 @@ void hl_mmu_fini(struct hl_device *hdev) /* MMU HW fini will be done in device hw_fini() */ } -/* - * hl_mmu_ctx_init - init a ctx for using the mmu module - * - * @ctx: pointer to the context structure +/** + * hl_mmu_ctx_init() - initialize a context for using the MMU module. + * @ctx: pointer to the context structure to initialize. * - * This function does the following: - * - Init a mutex to protect the concurrent mapping flow - * - Init a hash to hold all pgts related to this ctx + * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all + * page tables hops related to this context and an optional DRAM default page + * mapping. + * Return: 0 on success, non-zero otherwise. */ -void hl_mmu_ctx_init(struct hl_ctx *ctx) +int hl_mmu_ctx_init(struct hl_ctx *ctx) { - if (!ctx->hdev->mmu_enable) - return; + struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 num_of_hop3, total_hops, hop1_addr, hop2_addr, hop2_pte_addr, + hop3_pte_addr, pte_val; + int rc, i, j, hop3_allocated = 0; + + if (!hdev->mmu_enable) + return 0; mutex_init(&ctx->mmu_lock); hash_init(ctx->mmu_hash); + + if (!hdev->dram_supports_virtual_memory || + !hdev->dram_default_page_mapping) + return 0; + + num_of_hop3 = (prop->dram_size_for_default_page_mapping / + prop->dram_page_size) / + PTE_ENTRIES_IN_HOP; + + /* add hop1 and hop2 */ + total_hops = num_of_hop3 + 2; + + ctx->dram_default_hops = kzalloc(HL_PTE_SIZE * total_hops, GFP_KERNEL); + if (!ctx->dram_default_hops) { + rc = -ENOMEM; + goto alloc_err; + } + + hop1_addr = alloc_hop(ctx); + if (hop1_addr == ULLONG_MAX) { + dev_err(hdev->dev, "failed to alloc hop 1\n"); + rc = -ENOMEM; + goto hop1_err; + } + + ctx->dram_default_hops[total_hops - 1] = hop1_addr; + + hop2_addr = alloc_hop(ctx); + if (hop2_addr == ULLONG_MAX) { + dev_err(hdev->dev, "failed to alloc hop 2\n"); + rc = -ENOMEM; + goto hop2_err; + } + + ctx->dram_default_hops[total_hops - 2] = hop2_addr; + + for (i = 0 ; i < num_of_hop3 ; i++) { + ctx->dram_default_hops[i] = alloc_hop(ctx); + if (ctx->dram_default_hops[i] == ULLONG_MAX) { + dev_err(hdev->dev, "failed to alloc hop 3, i: %d\n", i); + rc = -ENOMEM; + goto hop3_err; + } + hop3_allocated++; + } + + /* need only pte 0 in hops 0 and 1 */ + pte_val = (hop1_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + hdev->asic_funcs->write_pte(hdev, get_hop0_addr(ctx), pte_val); + + pte_val = (hop2_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + hdev->asic_funcs->write_pte(hdev, hop1_addr, pte_val); + get_pte(ctx, hop1_addr); + + hop2_pte_addr = hop2_addr; + for (i = 0 ; i < num_of_hop3 ; i++) { + pte_val = (ctx->dram_default_hops[i] & PTE_PHYS_ADDR_MASK) | + PAGE_PRESENT_MASK; + hdev->asic_funcs->write_pte(hdev, hop2_pte_addr, pte_val); + get_pte(ctx, hop2_addr); + hop2_pte_addr += HL_PTE_SIZE; + } + + pte_val = (prop->mmu_dram_default_page_addr & PTE_PHYS_ADDR_MASK) | + LAST_MASK | PAGE_PRESENT_MASK; + + for (i = 0 ; i < num_of_hop3 ; i++) { + hop3_pte_addr = ctx->dram_default_hops[i]; + for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) { + hdev->asic_funcs->write_pte(hdev, hop3_pte_addr, + pte_val); + get_pte(ctx, ctx->dram_default_hops[i]); + hop3_pte_addr += HL_PTE_SIZE; + } + } + + /* flush all writes to reach PCI */ + mb(); + hdev->asic_funcs->read_pte(hdev, hop2_addr); + + return 0; + +hop3_err: + for (i = 0 ; i < hop3_allocated ; i++) + free_hop(ctx, ctx->dram_default_hops[i]); + free_hop(ctx, hop2_addr); +hop2_err: + free_hop(ctx, hop1_addr); +hop1_err: + kfree(ctx->dram_default_hops); +alloc_err: + mutex_destroy(&ctx->mmu_lock); + + return rc; } /* @@ -260,22 +360,65 @@ void hl_mmu_ctx_init(struct hl_ctx *ctx) * This function does the following: * - Free any pgts which were not freed yet * - Free the mutex + * - Free DRAM default page mapping hops */ void hl_mmu_ctx_fini(struct hl_ctx *ctx) { + struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; struct pgt_info *pgt_info; struct hlist_node *tmp; - int i; + u64 num_of_hop3, total_hops, hop1_addr, hop2_addr, hop2_pte_addr, + hop3_pte_addr; + int i, j; if (!ctx->hdev->mmu_enable) return; + if (hdev->dram_supports_virtual_memory && + hdev->dram_default_page_mapping) { + + num_of_hop3 = (prop->dram_size_for_default_page_mapping / + prop->dram_page_size) / + PTE_ENTRIES_IN_HOP; + + /* add hop1 and hop2 */ + total_hops = num_of_hop3 + 2; + hop1_addr = ctx->dram_default_hops[total_hops - 1]; + hop2_addr = ctx->dram_default_hops[total_hops - 2]; + + for (i = 0 ; i < num_of_hop3 ; i++) { + hop3_pte_addr = ctx->dram_default_hops[i]; + for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) { + clear_pte(hdev, hop3_pte_addr); + put_pte(ctx, ctx->dram_default_hops[i]); + hop3_pte_addr += HL_PTE_SIZE; + } + } + + hop2_pte_addr = hop2_addr; + for (i = 0 ; i < num_of_hop3 ; i++) { + clear_pte(hdev, hop2_pte_addr); + put_pte(ctx, hop2_addr); + hop2_pte_addr += HL_PTE_SIZE; + } + + clear_pte(hdev, hop1_addr); + put_pte(ctx, hop1_addr); + clear_pte(hdev, get_hop0_addr(ctx)); + + kfree(ctx->dram_default_hops); + + /* flush all writes to reach PCI */ + mb(); + hdev->asic_funcs->read_pte(hdev, hop2_addr); + } + if (!hash_empty(ctx->mmu_hash)) - dev_err(ctx->hdev->dev, - "ctx is freed while it has pgts in use\n"); + dev_err(hdev->dev, "ctx is freed while it has pgts in use\n"); hash_for_each_safe(ctx->mmu_hash, i, tmp, pgt_info, node) { - dev_err(ctx->hdev->dev, + dev_err(hdev->dev, "pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n", pgt_info->addr, ctx->asid, pgt_info->num_of_ptes); free_hop(ctx, pgt_info->addr); @@ -287,6 +430,7 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx) static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr) { struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; u64 hop0_addr = 0, hop0_pte_addr = 0, hop1_addr = 0, hop1_pte_addr = 0, hop2_addr = 0, hop2_pte_addr = 0, @@ -294,6 +438,11 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr) hop4_addr = 0, hop4_pte_addr = 0, curr_pte; int clear_hop3 = 1; + bool is_dram_addr, is_huge, is_dram_default_page_mapping; + + is_dram_addr = hl_mem_area_inside_range(virt_addr, PAGE_SIZE_2MB, + prop->va_space_dram_start_address, + prop->va_space_dram_end_address); hop0_addr = get_hop0_addr(ctx); @@ -328,7 +477,18 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr) curr_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr); - if (!(curr_pte & LAST_MASK)) { + is_huge = curr_pte & LAST_MASK; + + if (is_dram_addr && !is_huge) { + dev_err(hdev->dev, + "DRAM unmapping should use huge pages only\n"); + return -EFAULT; + } + + is_dram_default_page_mapping = + hdev->dram_default_page_mapping && is_dram_addr; + + if (!is_huge) { hop4_addr = get_next_hop_addr(curr_pte); if (hop4_addr == ULLONG_MAX) @@ -341,29 +501,51 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr) clear_hop3 = 0; } - if (!(curr_pte & PAGE_PRESENT_MASK)) - goto not_mapped; + if (is_dram_default_page_mapping) { + u64 zero_pte = (prop->mmu_dram_default_page_addr & + PTE_PHYS_ADDR_MASK) | LAST_MASK | + PAGE_PRESENT_MASK; + if (curr_pte == zero_pte) { + dev_err(hdev->dev, + "DRAM: hop3 PTE points to zero page, can't unmap, va: 0x%llx\n", + virt_addr); + goto not_mapped; + } + + if (!(curr_pte & PAGE_PRESENT_MASK)) { + dev_err(hdev->dev, + "DRAM: hop3 PTE is cleared! can't unmap, va: 0x%llx\n", + virt_addr); + goto not_mapped; + } - clear_pte(hdev, hop4_addr ? hop4_pte_addr : hop3_pte_addr); + hdev->asic_funcs->write_pte(hdev, hop3_pte_addr, zero_pte); + put_pte(ctx, hop3_addr); + } else { + if (!(curr_pte & PAGE_PRESENT_MASK)) + goto not_mapped; + + clear_pte(hdev, hop4_addr ? hop4_pte_addr : hop3_pte_addr); - if (hop4_addr && !put_pte(ctx, hop4_addr)) - clear_hop3 = 1; + if (hop4_addr && !put_pte(ctx, hop4_addr)) + clear_hop3 = 1; - if (!clear_hop3) - goto flush; - clear_pte(hdev, hop3_pte_addr); + if (!clear_hop3) + goto flush; + clear_pte(hdev, hop3_pte_addr); - if (put_pte(ctx, hop3_addr)) - goto flush; - clear_pte(hdev, hop2_pte_addr); + if (put_pte(ctx, hop3_addr)) + goto flush; + clear_pte(hdev, hop2_pte_addr); - if (put_pte(ctx, hop2_addr)) - goto flush; - clear_pte(hdev, hop1_pte_addr); + if (put_pte(ctx, hop2_addr)) + goto flush; + clear_pte(hdev, hop1_pte_addr); - if (put_pte(ctx, hop1_addr)) - goto flush; - clear_pte(hdev, hop0_pte_addr); + if (put_pte(ctx, hop1_addr)) + goto flush; + clear_pte(hdev, hop0_pte_addr); + } flush: /* flush all writes from all cores to reach PCI */ @@ -442,6 +624,7 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size) { struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; u64 hop0_addr = 0, hop0_pte_addr = 0, hop1_addr = 0, hop1_pte_addr = 0, hop2_addr = 0, hop2_pte_addr = 0, @@ -449,7 +632,8 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, hop4_addr = 0, hop4_pte_addr = 0, curr_pte = 0; bool hop1_new = false, hop2_new = false, hop3_new = false, - hop4_new = false, is_huge; + hop4_new = false, is_huge, is_dram_addr, + is_dram_default_page_mapping; int rc = -ENOMEM; /* @@ -461,6 +645,18 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, */ is_huge = page_size == PAGE_SIZE_2MB; + is_dram_addr = hl_mem_area_inside_range(virt_addr, page_size, + prop->va_space_dram_start_address, + prop->va_space_dram_end_address); + + if (is_dram_addr && !is_huge) { + dev_err(hdev->dev, "DRAM mapping should use huge pages only\n"); + return -EFAULT; + } + + is_dram_default_page_mapping = + hdev->dram_default_page_mapping && is_dram_addr; + hop0_addr = get_hop0_addr(ctx); hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr); @@ -505,7 +701,26 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, curr_pte = hdev->asic_funcs->read_pte(hdev, hop4_pte_addr); } - if (curr_pte & PAGE_PRESENT_MASK) { + if (is_dram_default_page_mapping) { + u64 zero_pte = (prop->mmu_dram_default_page_addr & + PTE_PHYS_ADDR_MASK) | LAST_MASK | + PAGE_PRESENT_MASK; + + if (curr_pte != zero_pte) { + dev_err(hdev->dev, + "DRAM: mapping already exists for virt_addr 0x%llx\n", + virt_addr); + rc = EINVAL; + goto err; + } + + if (hop1_new || hop2_new || hop3_new || hop4_new) { + dev_err(hdev->dev, + "DRAM mapping should not allocate more hops\n"); + rc = -EFAULT; + goto err; + } + } else if (curr_pte & PAGE_PRESENT_MASK) { dev_err(hdev->dev, "mapping already exists for virt_addr 0x%llx\n", virt_addr); |