summaryrefslogtreecommitdiff
path: root/arch/powerpc/platforms/powernv
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms/powernv')
-rw-r--r--arch/powerpc/platforms/powernv/Kconfig5
-rw-r--r--arch/powerpc/platforms/powernv/memtrace.c21
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c198
-rw-r--r--arch/powerpc/platforms/powernv/opal-powercap.c3
-rw-r--r--arch/powerpc/platforms/powernv/opal-sensor-groups.c4
-rw-r--r--arch/powerpc/platforms/powernv/opal-sysparam.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal.c2
-rw-r--r--arch/powerpc/platforms/powernv/setup.c47
8 files changed, 157 insertions, 125 deletions
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
index f8dc98d3dc01..028ac941c05c 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -15,11 +15,6 @@ config PPC_POWERNV
select PPC_SCOM
select ARCH_RANDOM
select CPU_FREQ
- select CPU_FREQ_GOV_PERFORMANCE
- select CPU_FREQ_GOV_POWERSAVE
- select CPU_FREQ_GOV_USERSPACE
- select CPU_FREQ_GOV_ONDEMAND
- select CPU_FREQ_GOV_CONSERVATIVE
select PPC_DOORBELL
select MMU_NOTIFIER
select FORCE_SMP
diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c
index 51dc398ae3f7..a29fdf8a2e56 100644
--- a/arch/powerpc/platforms/powernv/memtrace.c
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -90,17 +90,15 @@ static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages)
walk_memory_range(start_pfn, end_pfn, (void *)MEM_OFFLINE,
change_memblock_state);
- lock_device_hotplug();
- remove_memory(nid, start_pfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);
- unlock_device_hotplug();
return true;
}
static u64 memtrace_alloc_node(u32 nid, u64 size)
{
- u64 start_pfn, end_pfn, nr_pages;
+ u64 start_pfn, end_pfn, nr_pages, pfn;
u64 base_pfn;
+ u64 bytes = memory_block_size_bytes();
if (!node_spanned_pages(nid))
return 0;
@@ -113,8 +111,21 @@ static u64 memtrace_alloc_node(u32 nid, u64 size)
end_pfn = round_down(end_pfn - nr_pages, nr_pages);
for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) {
- if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true)
+ if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) {
+ /*
+ * Remove memory in memory block size chunks so that
+ * iomem resources are always split to the same size and
+ * we never try to remove memory that spans two iomem
+ * resources.
+ */
+ lock_device_hotplug();
+ end_pfn = base_pfn + nr_pages;
+ for (pfn = base_pfn; pfn < end_pfn; pfn += bytes>> PAGE_SHIFT) {
+ remove_memory(nid, pfn << PAGE_SHIFT, bytes);
+ }
+ unlock_device_hotplug();
return base_pfn << PAGE_SHIFT;
+ }
}
return 0;
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 8006c54a91e3..6f60e0931922 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -17,7 +17,7 @@
#include <linux/pci.h>
#include <linux/memblock.h>
#include <linux/iommu.h>
-#include <linux/debugfs.h>
+#include <linux/sizes.h>
#include <asm/debugfs.h>
#include <asm/tlb.h>
@@ -42,14 +42,6 @@
static DEFINE_SPINLOCK(npu_context_lock);
/*
- * When an address shootdown range exceeds this threshold we invalidate the
- * entire TLB on the GPU for the given PID rather than each specific address in
- * the range.
- */
-static uint64_t atsd_threshold = 2 * 1024 * 1024;
-static struct dentry *atsd_threshold_dentry;
-
-/*
* Other types of TCE cache invalidation are not functional in the
* hardware.
*/
@@ -454,79 +446,73 @@ static void put_mmio_atsd_reg(struct npu *npu, int reg)
}
/* MMIO ATSD register offsets */
-#define XTS_ATSD_AVA 1
-#define XTS_ATSD_STAT 2
-
-static void mmio_launch_invalidate(struct mmio_atsd_reg *mmio_atsd_reg,
- unsigned long launch, unsigned long va)
-{
- struct npu *npu = mmio_atsd_reg->npu;
- int reg = mmio_atsd_reg->reg;
-
- __raw_writeq_be(va, npu->mmio_atsd_regs[reg] + XTS_ATSD_AVA);
- eieio();
- __raw_writeq_be(launch, npu->mmio_atsd_regs[reg]);
-}
+#define XTS_ATSD_LAUNCH 0
+#define XTS_ATSD_AVA 1
+#define XTS_ATSD_STAT 2
-static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS],
- unsigned long pid, bool flush)
+static unsigned long get_atsd_launch_val(unsigned long pid, unsigned long psize)
{
- int i;
- unsigned long launch;
-
- for (i = 0; i <= max_npu2_index; i++) {
- if (mmio_atsd_reg[i].reg < 0)
- continue;
+ unsigned long launch = 0;
- /* IS set to invalidate matching PID */
- launch = PPC_BIT(12);
-
- /* PRS set to process-scoped */
- launch |= PPC_BIT(13);
+ if (psize == MMU_PAGE_COUNT) {
+ /* IS set to invalidate entire matching PID */
+ launch |= PPC_BIT(12);
+ } else {
+ /* AP set to invalidate region of psize */
+ launch |= (u64)mmu_get_ap(psize) << PPC_BITLSHIFT(17);
+ }
- /* AP */
- launch |= (u64)
- mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
+ /* PRS set to process-scoped */
+ launch |= PPC_BIT(13);
- /* PID */
- launch |= pid << PPC_BITLSHIFT(38);
+ /* PID */
+ launch |= pid << PPC_BITLSHIFT(38);
- /* No flush */
- launch |= !flush << PPC_BITLSHIFT(39);
+ /* Leave "No flush" (bit 39) 0 so every ATSD performs a flush */
- /* Invalidating the entire process doesn't use a va */
- mmio_launch_invalidate(&mmio_atsd_reg[i], launch, 0);
- }
+ return launch;
}
-static void mmio_invalidate_va(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS],
- unsigned long va, unsigned long pid, bool flush)
+static void mmio_atsd_regs_write(struct mmio_atsd_reg
+ mmio_atsd_reg[NV_MAX_NPUS], unsigned long offset,
+ unsigned long val)
{
- int i;
- unsigned long launch;
+ struct npu *npu;
+ int i, reg;
for (i = 0; i <= max_npu2_index; i++) {
- if (mmio_atsd_reg[i].reg < 0)
+ reg = mmio_atsd_reg[i].reg;
+ if (reg < 0)
continue;
- /* IS set to invalidate target VA */
- launch = 0;
+ npu = mmio_atsd_reg[i].npu;
+ __raw_writeq_be(val, npu->mmio_atsd_regs[reg] + offset);
+ }
+}
+
+static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS],
+ unsigned long pid)
+{
+ unsigned long launch = get_atsd_launch_val(pid, MMU_PAGE_COUNT);
- /* PRS set to process scoped */
- launch |= PPC_BIT(13);
+ /* Invalidating the entire process doesn't use a va */
+ mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_LAUNCH, launch);
+}
- /* AP */
- launch |= (u64)
- mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
+static void mmio_invalidate_range(struct mmio_atsd_reg
+ mmio_atsd_reg[NV_MAX_NPUS], unsigned long pid,
+ unsigned long start, unsigned long psize)
+{
+ unsigned long launch = get_atsd_launch_val(pid, psize);
- /* PID */
- launch |= pid << PPC_BITLSHIFT(38);
+ /* Write all VAs first */
+ mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_AVA, start);
- /* No flush */
- launch |= !flush << PPC_BITLSHIFT(39);
+ /* Issue one barrier for all address writes */
+ eieio();
- mmio_launch_invalidate(&mmio_atsd_reg[i], launch, va);
- }
+ /* Launch */
+ mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_LAUNCH, launch);
}
#define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
@@ -612,14 +598,36 @@ static void release_atsd_reg(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
}
/*
- * Invalidate either a single address or an entire PID depending on
- * the value of va.
+ * Invalidate a virtual address range
*/
-static void mmio_invalidate(struct npu_context *npu_context, int va,
- unsigned long address, bool flush)
+static void mmio_invalidate(struct npu_context *npu_context,
+ unsigned long start, unsigned long size)
{
struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
unsigned long pid = npu_context->mm->context.id;
+ unsigned long atsd_start = 0;
+ unsigned long end = start + size - 1;
+ int atsd_psize = MMU_PAGE_COUNT;
+
+ /*
+ * Convert the input range into one of the supported sizes. If the range
+ * doesn't fit, use the next larger supported size. Invalidation latency
+ * is high, so over-invalidation is preferred to issuing multiple
+ * invalidates.
+ *
+ * A 4K page size isn't supported by NPU/GPU ATS, so that case is
+ * ignored.
+ */
+ if (size == SZ_64K) {
+ atsd_start = start;
+ atsd_psize = MMU_PAGE_64K;
+ } else if (ALIGN_DOWN(start, SZ_2M) == ALIGN_DOWN(end, SZ_2M)) {
+ atsd_start = ALIGN_DOWN(start, SZ_2M);
+ atsd_psize = MMU_PAGE_2M;
+ } else if (ALIGN_DOWN(start, SZ_1G) == ALIGN_DOWN(end, SZ_1G)) {
+ atsd_start = ALIGN_DOWN(start, SZ_1G);
+ atsd_psize = MMU_PAGE_1G;
+ }
if (npu_context->nmmu_flush)
/*
@@ -634,23 +642,25 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
* an invalidate.
*/
acquire_atsd_reg(npu_context, mmio_atsd_reg);
- if (va)
- mmio_invalidate_va(mmio_atsd_reg, address, pid, flush);
+
+ if (atsd_psize == MMU_PAGE_COUNT)
+ mmio_invalidate_pid(mmio_atsd_reg, pid);
else
- mmio_invalidate_pid(mmio_atsd_reg, pid, flush);
+ mmio_invalidate_range(mmio_atsd_reg, pid, atsd_start,
+ atsd_psize);
mmio_invalidate_wait(mmio_atsd_reg);
- if (flush) {
- /*
- * The GPU requires two flush ATSDs to ensure all entries have
- * been flushed. We use PID 0 as it will never be used for a
- * process on the GPU.
- */
- mmio_invalidate_pid(mmio_atsd_reg, 0, true);
- mmio_invalidate_wait(mmio_atsd_reg);
- mmio_invalidate_pid(mmio_atsd_reg, 0, true);
- mmio_invalidate_wait(mmio_atsd_reg);
- }
+
+ /*
+ * The GPU requires two flush ATSDs to ensure all entries have been
+ * flushed. We use PID 0 as it will never be used for a process on the
+ * GPU.
+ */
+ mmio_invalidate_pid(mmio_atsd_reg, 0);
+ mmio_invalidate_wait(mmio_atsd_reg);
+ mmio_invalidate_pid(mmio_atsd_reg, 0);
+ mmio_invalidate_wait(mmio_atsd_reg);
+
release_atsd_reg(mmio_atsd_reg);
}
@@ -667,7 +677,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn,
* There should be no more translation requests for this PID, but we
* need to ensure any entries for it are removed from the TLB.
*/
- mmio_invalidate(npu_context, 0, 0, true);
+ mmio_invalidate(npu_context, 0, ~0UL);
}
static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
@@ -676,8 +686,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
pte_t pte)
{
struct npu_context *npu_context = mn_to_npu_context(mn);
-
- mmio_invalidate(npu_context, 1, address, true);
+ mmio_invalidate(npu_context, address, PAGE_SIZE);
}
static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
@@ -685,21 +694,7 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
unsigned long start, unsigned long end)
{
struct npu_context *npu_context = mn_to_npu_context(mn);
- unsigned long address;
-
- if (end - start > atsd_threshold) {
- /*
- * Just invalidate the entire PID if the address range is too
- * large.
- */
- mmio_invalidate(npu_context, 0, 0, true);
- } else {
- for (address = start; address < end; address += PAGE_SIZE)
- mmio_invalidate(npu_context, 1, address, false);
-
- /* Do the flush only on the final addess == end */
- mmio_invalidate(npu_context, 1, address, true);
- }
+ mmio_invalidate(npu_context, start, end - start);
}
static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
@@ -962,11 +957,6 @@ int pnv_npu2_init(struct pnv_phb *phb)
static int npu_index;
uint64_t rc = 0;
- if (!atsd_threshold_dentry) {
- atsd_threshold_dentry = debugfs_create_x64("atsd_threshold",
- 0600, powerpc_debugfs_root, &atsd_threshold);
- }
-
phb->npu.nmmu_flush =
of_property_read_bool(phb->hose->dn, "ibm,nmmu-flush");
for_each_child_of_node(phb->hose->dn, dn) {
diff --git a/arch/powerpc/platforms/powernv/opal-powercap.c b/arch/powerpc/platforms/powernv/opal-powercap.c
index badb29bde93f..d90ee4fc2c6a 100644
--- a/arch/powerpc/platforms/powernv/opal-powercap.c
+++ b/arch/powerpc/platforms/powernv/opal-powercap.c
@@ -199,7 +199,7 @@ void __init opal_powercap_init(void)
}
j = 0;
- pcaps[i].pg.name = node->name;
+ pcaps[i].pg.name = kasprintf(GFP_KERNEL, "%pOFn", node);
if (has_min) {
powercap_add_attr(min, "powercap-min",
&pcaps[i].pattrs[j]);
@@ -237,6 +237,7 @@ out_pcaps_pattrs:
while (--i >= 0) {
kfree(pcaps[i].pattrs);
kfree(pcaps[i].pg.attrs);
+ kfree(pcaps[i].pg.name);
}
kobject_put(powercap_kobj);
out_pcaps:
diff --git a/arch/powerpc/platforms/powernv/opal-sensor-groups.c b/arch/powerpc/platforms/powernv/opal-sensor-groups.c
index f7d04b6a2d7a..179609220e6f 100644
--- a/arch/powerpc/platforms/powernv/opal-sensor-groups.c
+++ b/arch/powerpc/platforms/powernv/opal-sensor-groups.c
@@ -214,9 +214,9 @@ void __init opal_sensor_groups_init(void)
}
if (!of_property_read_u32(node, "ibm,chip-id", &chipid))
- sprintf(sgs[i].name, "%s%d", node->name, chipid);
+ sprintf(sgs[i].name, "%pOFn%d", node, chipid);
else
- sprintf(sgs[i].name, "%s", node->name);
+ sprintf(sgs[i].name, "%pOFn", node);
sgs[i].sg.name = sgs[i].name;
if (add_attr_group(ops, len, &sgs[i], sgid)) {
diff --git a/arch/powerpc/platforms/powernv/opal-sysparam.c b/arch/powerpc/platforms/powernv/opal-sysparam.c
index 9aa87df114fd..916a4b7b1bb5 100644
--- a/arch/powerpc/platforms/powernv/opal-sysparam.c
+++ b/arch/powerpc/platforms/powernv/opal-sysparam.c
@@ -194,7 +194,7 @@ void __init opal_sys_param_init(void)
count = of_property_count_strings(sysparam, "param-name");
if (count < 0) {
pr_err("SYSPARAM: No string found of property param-name in "
- "the node %s\n", sysparam->name);
+ "the node %pOFn\n", sysparam);
goto out_param_buf;
}
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 38fe4087484a..a4641515956f 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -535,7 +535,7 @@ static int opal_recover_mce(struct pt_regs *regs,
return recovered;
}
-void pnv_platform_error_reboot(struct pt_regs *regs, const char *msg)
+void __noreturn pnv_platform_error_reboot(struct pt_regs *regs, const char *msg)
{
panic_flush_kmsg_start();
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index adddde023622..14befee4b3f1 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -219,17 +219,41 @@ static void pnv_prepare_going_down(void)
static void __noreturn pnv_restart(char *cmd)
{
- long rc = OPAL_BUSY;
+ long rc;
pnv_prepare_going_down();
- while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
- rc = opal_cec_reboot();
- if (rc == OPAL_BUSY_EVENT)
- opal_poll_events(NULL);
+ do {
+ if (!cmd)
+ rc = opal_cec_reboot();
+ else if (strcmp(cmd, "full") == 0)
+ rc = opal_cec_reboot2(OPAL_REBOOT_FULL_IPL, NULL);
else
+ rc = OPAL_UNSUPPORTED;
+
+ if (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
+ /* Opal is busy wait for some time and retry */
+ opal_poll_events(NULL);
mdelay(10);
- }
+
+ } else if (cmd && rc) {
+ /* Unknown error while issuing reboot */
+ if (rc == OPAL_UNSUPPORTED)
+ pr_err("Unsupported '%s' reboot.\n", cmd);
+ else
+ pr_err("Unable to issue '%s' reboot. Err=%ld\n",
+ cmd, rc);
+ pr_info("Forcing a cec-reboot\n");
+ cmd = NULL;
+ rc = OPAL_BUSY;
+
+ } else if (rc != OPAL_SUCCESS) {
+ /* Unknown error while issuing cec-reboot */
+ pr_err("Unable to reboot. Err=%ld\n", rc);
+ }
+
+ } while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT);
+
for (;;)
opal_poll_events(NULL);
}
@@ -437,6 +461,16 @@ static unsigned long pnv_get_proc_freq(unsigned int cpu)
return ret_freq;
}
+static long pnv_machine_check_early(struct pt_regs *regs)
+{
+ long handled = 0;
+
+ if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
+ handled = cur_cpu_spec->machine_check_early(regs);
+
+ return handled;
+}
+
define_machine(powernv) {
.name = "PowerNV",
.probe = pnv_probe,
@@ -448,6 +482,7 @@ define_machine(powernv) {
.machine_shutdown = pnv_shutdown,
.power_save = NULL,
.calibrate_decr = generic_calibrate_decr,
+ .machine_check_early = pnv_machine_check_early,
#ifdef CONFIG_KEXEC_CORE
.kexec_cpu_down = pnv_kexec_cpu_down,
#endif