diff options
Diffstat (limited to 'arch/powerpc/platforms/powernv')
-rw-r--r-- | arch/powerpc/platforms/powernv/Kconfig | 5 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/memtrace.c | 21 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/npu-dma.c | 198 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/opal-powercap.c | 3 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/opal-sensor-groups.c | 4 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/opal-sysparam.c | 2 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/opal.c | 2 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/setup.c | 47 |
8 files changed, 157 insertions, 125 deletions
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig index f8dc98d3dc01..028ac941c05c 100644 --- a/arch/powerpc/platforms/powernv/Kconfig +++ b/arch/powerpc/platforms/powernv/Kconfig @@ -15,11 +15,6 @@ config PPC_POWERNV select PPC_SCOM select ARCH_RANDOM select CPU_FREQ - select CPU_FREQ_GOV_PERFORMANCE - select CPU_FREQ_GOV_POWERSAVE - select CPU_FREQ_GOV_USERSPACE - select CPU_FREQ_GOV_ONDEMAND - select CPU_FREQ_GOV_CONSERVATIVE select PPC_DOORBELL select MMU_NOTIFIER select FORCE_SMP diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index 51dc398ae3f7..a29fdf8a2e56 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -90,17 +90,15 @@ static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages) walk_memory_range(start_pfn, end_pfn, (void *)MEM_OFFLINE, change_memblock_state); - lock_device_hotplug(); - remove_memory(nid, start_pfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT); - unlock_device_hotplug(); return true; } static u64 memtrace_alloc_node(u32 nid, u64 size) { - u64 start_pfn, end_pfn, nr_pages; + u64 start_pfn, end_pfn, nr_pages, pfn; u64 base_pfn; + u64 bytes = memory_block_size_bytes(); if (!node_spanned_pages(nid)) return 0; @@ -113,8 +111,21 @@ static u64 memtrace_alloc_node(u32 nid, u64 size) end_pfn = round_down(end_pfn - nr_pages, nr_pages); for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) { - if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) + if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) { + /* + * Remove memory in memory block size chunks so that + * iomem resources are always split to the same size and + * we never try to remove memory that spans two iomem + * resources. + */ + lock_device_hotplug(); + end_pfn = base_pfn + nr_pages; + for (pfn = base_pfn; pfn < end_pfn; pfn += bytes>> PAGE_SHIFT) { + remove_memory(nid, pfn << PAGE_SHIFT, bytes); + } + unlock_device_hotplug(); return base_pfn << PAGE_SHIFT; + } } return 0; diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index 8006c54a91e3..6f60e0931922 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -17,7 +17,7 @@ #include <linux/pci.h> #include <linux/memblock.h> #include <linux/iommu.h> -#include <linux/debugfs.h> +#include <linux/sizes.h> #include <asm/debugfs.h> #include <asm/tlb.h> @@ -42,14 +42,6 @@ static DEFINE_SPINLOCK(npu_context_lock); /* - * When an address shootdown range exceeds this threshold we invalidate the - * entire TLB on the GPU for the given PID rather than each specific address in - * the range. - */ -static uint64_t atsd_threshold = 2 * 1024 * 1024; -static struct dentry *atsd_threshold_dentry; - -/* * Other types of TCE cache invalidation are not functional in the * hardware. */ @@ -454,79 +446,73 @@ static void put_mmio_atsd_reg(struct npu *npu, int reg) } /* MMIO ATSD register offsets */ -#define XTS_ATSD_AVA 1 -#define XTS_ATSD_STAT 2 - -static void mmio_launch_invalidate(struct mmio_atsd_reg *mmio_atsd_reg, - unsigned long launch, unsigned long va) -{ - struct npu *npu = mmio_atsd_reg->npu; - int reg = mmio_atsd_reg->reg; - - __raw_writeq_be(va, npu->mmio_atsd_regs[reg] + XTS_ATSD_AVA); - eieio(); - __raw_writeq_be(launch, npu->mmio_atsd_regs[reg]); -} +#define XTS_ATSD_LAUNCH 0 +#define XTS_ATSD_AVA 1 +#define XTS_ATSD_STAT 2 -static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], - unsigned long pid, bool flush) +static unsigned long get_atsd_launch_val(unsigned long pid, unsigned long psize) { - int i; - unsigned long launch; - - for (i = 0; i <= max_npu2_index; i++) { - if (mmio_atsd_reg[i].reg < 0) - continue; + unsigned long launch = 0; - /* IS set to invalidate matching PID */ - launch = PPC_BIT(12); - - /* PRS set to process-scoped */ - launch |= PPC_BIT(13); + if (psize == MMU_PAGE_COUNT) { + /* IS set to invalidate entire matching PID */ + launch |= PPC_BIT(12); + } else { + /* AP set to invalidate region of psize */ + launch |= (u64)mmu_get_ap(psize) << PPC_BITLSHIFT(17); + } - /* AP */ - launch |= (u64) - mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); + /* PRS set to process-scoped */ + launch |= PPC_BIT(13); - /* PID */ - launch |= pid << PPC_BITLSHIFT(38); + /* PID */ + launch |= pid << PPC_BITLSHIFT(38); - /* No flush */ - launch |= !flush << PPC_BITLSHIFT(39); + /* Leave "No flush" (bit 39) 0 so every ATSD performs a flush */ - /* Invalidating the entire process doesn't use a va */ - mmio_launch_invalidate(&mmio_atsd_reg[i], launch, 0); - } + return launch; } -static void mmio_invalidate_va(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], - unsigned long va, unsigned long pid, bool flush) +static void mmio_atsd_regs_write(struct mmio_atsd_reg + mmio_atsd_reg[NV_MAX_NPUS], unsigned long offset, + unsigned long val) { - int i; - unsigned long launch; + struct npu *npu; + int i, reg; for (i = 0; i <= max_npu2_index; i++) { - if (mmio_atsd_reg[i].reg < 0) + reg = mmio_atsd_reg[i].reg; + if (reg < 0) continue; - /* IS set to invalidate target VA */ - launch = 0; + npu = mmio_atsd_reg[i].npu; + __raw_writeq_be(val, npu->mmio_atsd_regs[reg] + offset); + } +} + +static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], + unsigned long pid) +{ + unsigned long launch = get_atsd_launch_val(pid, MMU_PAGE_COUNT); - /* PRS set to process scoped */ - launch |= PPC_BIT(13); + /* Invalidating the entire process doesn't use a va */ + mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_LAUNCH, launch); +} - /* AP */ - launch |= (u64) - mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); +static void mmio_invalidate_range(struct mmio_atsd_reg + mmio_atsd_reg[NV_MAX_NPUS], unsigned long pid, + unsigned long start, unsigned long psize) +{ + unsigned long launch = get_atsd_launch_val(pid, psize); - /* PID */ - launch |= pid << PPC_BITLSHIFT(38); + /* Write all VAs first */ + mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_AVA, start); - /* No flush */ - launch |= !flush << PPC_BITLSHIFT(39); + /* Issue one barrier for all address writes */ + eieio(); - mmio_launch_invalidate(&mmio_atsd_reg[i], launch, va); - } + /* Launch */ + mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_LAUNCH, launch); } #define mn_to_npu_context(x) container_of(x, struct npu_context, mn) @@ -612,14 +598,36 @@ static void release_atsd_reg(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) } /* - * Invalidate either a single address or an entire PID depending on - * the value of va. + * Invalidate a virtual address range */ -static void mmio_invalidate(struct npu_context *npu_context, int va, - unsigned long address, bool flush) +static void mmio_invalidate(struct npu_context *npu_context, + unsigned long start, unsigned long size) { struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]; unsigned long pid = npu_context->mm->context.id; + unsigned long atsd_start = 0; + unsigned long end = start + size - 1; + int atsd_psize = MMU_PAGE_COUNT; + + /* + * Convert the input range into one of the supported sizes. If the range + * doesn't fit, use the next larger supported size. Invalidation latency + * is high, so over-invalidation is preferred to issuing multiple + * invalidates. + * + * A 4K page size isn't supported by NPU/GPU ATS, so that case is + * ignored. + */ + if (size == SZ_64K) { + atsd_start = start; + atsd_psize = MMU_PAGE_64K; + } else if (ALIGN_DOWN(start, SZ_2M) == ALIGN_DOWN(end, SZ_2M)) { + atsd_start = ALIGN_DOWN(start, SZ_2M); + atsd_psize = MMU_PAGE_2M; + } else if (ALIGN_DOWN(start, SZ_1G) == ALIGN_DOWN(end, SZ_1G)) { + atsd_start = ALIGN_DOWN(start, SZ_1G); + atsd_psize = MMU_PAGE_1G; + } if (npu_context->nmmu_flush) /* @@ -634,23 +642,25 @@ static void mmio_invalidate(struct npu_context *npu_context, int va, * an invalidate. */ acquire_atsd_reg(npu_context, mmio_atsd_reg); - if (va) - mmio_invalidate_va(mmio_atsd_reg, address, pid, flush); + + if (atsd_psize == MMU_PAGE_COUNT) + mmio_invalidate_pid(mmio_atsd_reg, pid); else - mmio_invalidate_pid(mmio_atsd_reg, pid, flush); + mmio_invalidate_range(mmio_atsd_reg, pid, atsd_start, + atsd_psize); mmio_invalidate_wait(mmio_atsd_reg); - if (flush) { - /* - * The GPU requires two flush ATSDs to ensure all entries have - * been flushed. We use PID 0 as it will never be used for a - * process on the GPU. - */ - mmio_invalidate_pid(mmio_atsd_reg, 0, true); - mmio_invalidate_wait(mmio_atsd_reg); - mmio_invalidate_pid(mmio_atsd_reg, 0, true); - mmio_invalidate_wait(mmio_atsd_reg); - } + + /* + * The GPU requires two flush ATSDs to ensure all entries have been + * flushed. We use PID 0 as it will never be used for a process on the + * GPU. + */ + mmio_invalidate_pid(mmio_atsd_reg, 0); + mmio_invalidate_wait(mmio_atsd_reg); + mmio_invalidate_pid(mmio_atsd_reg, 0); + mmio_invalidate_wait(mmio_atsd_reg); + release_atsd_reg(mmio_atsd_reg); } @@ -667,7 +677,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn, * There should be no more translation requests for this PID, but we * need to ensure any entries for it are removed from the TLB. */ - mmio_invalidate(npu_context, 0, 0, true); + mmio_invalidate(npu_context, 0, ~0UL); } static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn, @@ -676,8 +686,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn, pte_t pte) { struct npu_context *npu_context = mn_to_npu_context(mn); - - mmio_invalidate(npu_context, 1, address, true); + mmio_invalidate(npu_context, address, PAGE_SIZE); } static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, @@ -685,21 +694,7 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, unsigned long start, unsigned long end) { struct npu_context *npu_context = mn_to_npu_context(mn); - unsigned long address; - - if (end - start > atsd_threshold) { - /* - * Just invalidate the entire PID if the address range is too - * large. - */ - mmio_invalidate(npu_context, 0, 0, true); - } else { - for (address = start; address < end; address += PAGE_SIZE) - mmio_invalidate(npu_context, 1, address, false); - - /* Do the flush only on the final addess == end */ - mmio_invalidate(npu_context, 1, address, true); - } + mmio_invalidate(npu_context, start, end - start); } static const struct mmu_notifier_ops nv_nmmu_notifier_ops = { @@ -962,11 +957,6 @@ int pnv_npu2_init(struct pnv_phb *phb) static int npu_index; uint64_t rc = 0; - if (!atsd_threshold_dentry) { - atsd_threshold_dentry = debugfs_create_x64("atsd_threshold", - 0600, powerpc_debugfs_root, &atsd_threshold); - } - phb->npu.nmmu_flush = of_property_read_bool(phb->hose->dn, "ibm,nmmu-flush"); for_each_child_of_node(phb->hose->dn, dn) { diff --git a/arch/powerpc/platforms/powernv/opal-powercap.c b/arch/powerpc/platforms/powernv/opal-powercap.c index badb29bde93f..d90ee4fc2c6a 100644 --- a/arch/powerpc/platforms/powernv/opal-powercap.c +++ b/arch/powerpc/platforms/powernv/opal-powercap.c @@ -199,7 +199,7 @@ void __init opal_powercap_init(void) } j = 0; - pcaps[i].pg.name = node->name; + pcaps[i].pg.name = kasprintf(GFP_KERNEL, "%pOFn", node); if (has_min) { powercap_add_attr(min, "powercap-min", &pcaps[i].pattrs[j]); @@ -237,6 +237,7 @@ out_pcaps_pattrs: while (--i >= 0) { kfree(pcaps[i].pattrs); kfree(pcaps[i].pg.attrs); + kfree(pcaps[i].pg.name); } kobject_put(powercap_kobj); out_pcaps: diff --git a/arch/powerpc/platforms/powernv/opal-sensor-groups.c b/arch/powerpc/platforms/powernv/opal-sensor-groups.c index f7d04b6a2d7a..179609220e6f 100644 --- a/arch/powerpc/platforms/powernv/opal-sensor-groups.c +++ b/arch/powerpc/platforms/powernv/opal-sensor-groups.c @@ -214,9 +214,9 @@ void __init opal_sensor_groups_init(void) } if (!of_property_read_u32(node, "ibm,chip-id", &chipid)) - sprintf(sgs[i].name, "%s%d", node->name, chipid); + sprintf(sgs[i].name, "%pOFn%d", node, chipid); else - sprintf(sgs[i].name, "%s", node->name); + sprintf(sgs[i].name, "%pOFn", node); sgs[i].sg.name = sgs[i].name; if (add_attr_group(ops, len, &sgs[i], sgid)) { diff --git a/arch/powerpc/platforms/powernv/opal-sysparam.c b/arch/powerpc/platforms/powernv/opal-sysparam.c index 9aa87df114fd..916a4b7b1bb5 100644 --- a/arch/powerpc/platforms/powernv/opal-sysparam.c +++ b/arch/powerpc/platforms/powernv/opal-sysparam.c @@ -194,7 +194,7 @@ void __init opal_sys_param_init(void) count = of_property_count_strings(sysparam, "param-name"); if (count < 0) { pr_err("SYSPARAM: No string found of property param-name in " - "the node %s\n", sysparam->name); + "the node %pOFn\n", sysparam); goto out_param_buf; } diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 38fe4087484a..a4641515956f 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -535,7 +535,7 @@ static int opal_recover_mce(struct pt_regs *regs, return recovered; } -void pnv_platform_error_reboot(struct pt_regs *regs, const char *msg) +void __noreturn pnv_platform_error_reboot(struct pt_regs *regs, const char *msg) { panic_flush_kmsg_start(); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index adddde023622..14befee4b3f1 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -219,17 +219,41 @@ static void pnv_prepare_going_down(void) static void __noreturn pnv_restart(char *cmd) { - long rc = OPAL_BUSY; + long rc; pnv_prepare_going_down(); - while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { - rc = opal_cec_reboot(); - if (rc == OPAL_BUSY_EVENT) - opal_poll_events(NULL); + do { + if (!cmd) + rc = opal_cec_reboot(); + else if (strcmp(cmd, "full") == 0) + rc = opal_cec_reboot2(OPAL_REBOOT_FULL_IPL, NULL); else + rc = OPAL_UNSUPPORTED; + + if (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { + /* Opal is busy wait for some time and retry */ + opal_poll_events(NULL); mdelay(10); - } + + } else if (cmd && rc) { + /* Unknown error while issuing reboot */ + if (rc == OPAL_UNSUPPORTED) + pr_err("Unsupported '%s' reboot.\n", cmd); + else + pr_err("Unable to issue '%s' reboot. Err=%ld\n", + cmd, rc); + pr_info("Forcing a cec-reboot\n"); + cmd = NULL; + rc = OPAL_BUSY; + + } else if (rc != OPAL_SUCCESS) { + /* Unknown error while issuing cec-reboot */ + pr_err("Unable to reboot. Err=%ld\n", rc); + } + + } while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT); + for (;;) opal_poll_events(NULL); } @@ -437,6 +461,16 @@ static unsigned long pnv_get_proc_freq(unsigned int cpu) return ret_freq; } +static long pnv_machine_check_early(struct pt_regs *regs) +{ + long handled = 0; + + if (cur_cpu_spec && cur_cpu_spec->machine_check_early) + handled = cur_cpu_spec->machine_check_early(regs); + + return handled; +} + define_machine(powernv) { .name = "PowerNV", .probe = pnv_probe, @@ -448,6 +482,7 @@ define_machine(powernv) { .machine_shutdown = pnv_shutdown, .power_save = NULL, .calibrate_decr = generic_calibrate_decr, + .machine_check_early = pnv_machine_check_early, #ifdef CONFIG_KEXEC_CORE .kexec_cpu_down = pnv_kexec_cpu_down, #endif |