From 83a092cf95f28696ddc36c8add0cf03ac034897f Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 12 May 2017 03:40:40 +1000
Subject: powerpc: Link warning for orphan sections

Add --orphan-handling=warn to final link flags. This ensures we can
handle all sections explicitly. This would have caught subtle breakage
such as 7de3b27bac47da9de08409df1d69664acbb72197 at build-time.

Also bring existing orphan sections into the fold:
- .text.hot and .text.unlikely are compiler generated sections.
- .sdata2, .dynsbss, .plt are used by PPC32
- We previously did not specify DWARF_DEBUG or STABS_DEBUG
- DWARF_DEBUG did not include all DWARF sections that can be emitted
- A number of sections are unused and can be discarded.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 include/asm-generic/vmlinux.lds.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 314a0b9219c6..9862afb3ae05 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -595,6 +595,7 @@
 #define SBSS(sbss_align)						\
 	. = ALIGN(sbss_align);						\
 	.sbss : AT(ADDR(.sbss) - LOAD_OFFSET) {				\
+		*(.dynsbss)						\
 		*(.sbss)						\
 		*(.scommon)						\
 	}
@@ -641,11 +642,22 @@
 		.debug_str      0 : { *(.debug_str) }			\
 		.debug_loc      0 : { *(.debug_loc) }			\
 		.debug_macinfo  0 : { *(.debug_macinfo) }		\
+		.debug_pubtypes 0 : { *(.debug_pubtypes) }		\
+		/* DWARF 3 */						\
+		.debug_ranges	0 : { *(.debug_ranges) }		\
 		/* SGI/MIPS DWARF 2 extensions */			\
 		.debug_weaknames 0 : { *(.debug_weaknames) }		\
 		.debug_funcnames 0 : { *(.debug_funcnames) }		\
 		.debug_typenames 0 : { *(.debug_typenames) }		\
 		.debug_varnames  0 : { *(.debug_varnames) }		\
+		/* GNU DWARF 2 extensions */				\
+		.debug_gnu_pubnames 0 : { *(.debug_gnu_pubnames) }	\
+		.debug_gnu_pubtypes 0 : { *(.debug_gnu_pubtypes) }	\
+		/* DWARF 4 */						\
+		.debug_types	0 : { *(.debug_types) }			\
+		/* DWARF 5 */						\
+		.debug_macro	0 : { *(.debug_macro) }			\
+		.debug_addr	0 : { *(.debug_addr) }
 
 		/* Stabs debugging sections.  */
 #define STABS_DEBUG							\
-- 
cgit v1.2.3


From fd851a3cdc196bfc1d229b5f22369069af532bf8 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Mon, 29 May 2017 12:22:23 +1000
Subject: spin loop primitives for busy waiting

Current busy-wait loops are implemented by repeatedly calling cpu_relax()
to give an arch option for a low-latency option to improve power and/or
SMT resource contention.

This poses some difficulties for powerpc, which has SMT priority setting
instructions (priorities determine how ifetch cycles are apportioned).
powerpc's cpu_relax() is implemented by setting a low priority then
setting normal priority. This has several problems:

 - Changing thread priority can have some execution cost and potential
   impact to other threads in the core. It's inefficient to execute them
   every time around a busy-wait loop.

 - Depending on implementation details, a `low ; medium` sequence may
   not have much if any affect. Some software with similar pattern
   actually inserts a lot of nops between, in order to cause a few fetch
   cycles with the low priority.

 - The busy-wait loop runs with regular priority. This might only be a few
   fetch cycles, but if there are several threads running such loops, they
   could cause a noticable impact on a non-idle thread.

Implement spin_begin, spin_end primitives that can be used around busy
wait loops, which default to no-ops. And spin_cpu_relax which defaults to
cpu_relax.

This will allow architectures to hook the entry and exit of busy-wait
loops, and will allow powerpc to set low SMT priority at entry, and
normal priority at exit.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 include/linux/processor.h | 70 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)
 create mode 100644 include/linux/processor.h

(limited to 'include')

diff --git a/include/linux/processor.h b/include/linux/processor.h
new file mode 100644
index 000000000000..da0c5e56ca02
--- /dev/null
+++ b/include/linux/processor.h
@@ -0,0 +1,70 @@
+/* Misc low level processor primitives */
+#ifndef _LINUX_PROCESSOR_H
+#define _LINUX_PROCESSOR_H
+
+#include <asm/processor.h>
+
+/*
+ * spin_begin is used before beginning a busy-wait loop, and must be paired
+ * with spin_end when the loop is exited. spin_cpu_relax must be called
+ * within the loop.
+ *
+ * The loop body should be as small and fast as possible, on the order of
+ * tens of instructions/cycles as a guide. It should and avoid calling
+ * cpu_relax, or any "spin" or sleep type of primitive including nested uses
+ * of these primitives. It should not lock or take any other resource.
+ * Violations of these guidelies will not cause a bug, but may cause sub
+ * optimal performance.
+ *
+ * These loops are optimized to be used where wait times are expected to be
+ * less than the cost of a context switch (and associated overhead).
+ *
+ * Detection of resource owner and decision to spin or sleep or guest-yield
+ * (e.g., spin lock holder vcpu preempted, or mutex owner not on CPU) can be
+ * tested within the loop body.
+ */
+#ifndef spin_begin
+#define spin_begin()
+#endif
+
+#ifndef spin_cpu_relax
+#define spin_cpu_relax() cpu_relax()
+#endif
+
+/*
+ * spin_cpu_yield may be called to yield (undirected) to the hypervisor if
+ * necessary. This should be used if the wait is expected to take longer
+ * than context switch overhead, but we can't sleep or do a directed yield.
+ */
+#ifndef spin_cpu_yield
+#define spin_cpu_yield() cpu_relax_yield()
+#endif
+
+#ifndef spin_end
+#define spin_end()
+#endif
+
+/*
+ * spin_until_cond can be used to wait for a condition to become true. It
+ * may be expected that the first iteration will true in the common case
+ * (no spinning), so that callers should not require a first "likely" test
+ * for the uncontended case before using this primitive.
+ *
+ * Usage and implementation guidelines are the same as for the spin_begin
+ * primitives, above.
+ */
+#ifndef spin_until_cond
+#define spin_until_cond(cond)					\
+do {								\
+	if (unlikely(!(cond))) {				\
+		spin_begin();					\
+		do {						\
+			spin_cpu_relax();			\
+		} while (!(cond));				\
+		spin_end();					\
+	}							\
+} while (0)
+
+#endif
+
+#endif /* _LINUX_PROCESSOR_H */
-- 
cgit v1.2.3


From 3ced8d73006321bd2a0412fa0ff4b065a02e7514 Mon Sep 17 00:00:00 2001
From: Christophe Lombard <clombard@linux.vnet.ibm.com>
Date: Thu, 22 Jun 2017 15:07:27 +0200
Subject: cxl: Export library to support IBM XSL

This patch exports a in-kernel 'library' API which can be called by
other drivers to help interacting with an IBM XSL on a POWER9 system.

The XSL (Translation Service Layer) is a stripped down version of the
PSL (Power Service Layer) used in some cards such as the Mellanox CX5.
Like the PSL, it implements the CAIA architecture, but has a number
of differences, mostly in it's implementation dependent registers.

The XSL also uses a special DMA cxl mode, which uses a slightly
different init sequence for the CAPP and PHB.

Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Signed-off-by: Christophe Lombard <clombard@linux.vnet.ibm.com>
Acked-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 include/misc/cxllib.h | 133 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 133 insertions(+)
 create mode 100644 include/misc/cxllib.h

(limited to 'include')

diff --git a/include/misc/cxllib.h b/include/misc/cxllib.h
new file mode 100644
index 000000000000..e5aa29f019a6
--- /dev/null
+++ b/include/misc/cxllib.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright 2017 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _MISC_CXLLIB_H
+#define _MISC_CXLLIB_H
+
+#include <linux/pci.h>
+#include <asm/reg.h>
+
+/*
+ * cxl driver exports a in-kernel 'library' API which can be called by
+ * other drivers to help interacting with an IBM XSL.
+ */
+
+/*
+ * tells whether capi is supported on the PCIe slot where the
+ * device is seated
+ *
+ * Input:
+ *	dev: device whose slot needs to be checked
+ *	flags: 0 for the time being
+ */
+bool cxllib_slot_is_supported(struct pci_dev *dev, unsigned long flags);
+
+
+/*
+ * Returns the configuration parameters to be used by the XSL or device
+ *
+ * Input:
+ *	dev: device, used to find PHB
+ * Output:
+ *	struct cxllib_xsl_config:
+ *		version
+ *		capi BAR address, i.e. 0x2000000000000-0x2FFFFFFFFFFFF
+ *		capi BAR size
+ *		data send control (XSL_DSNCTL)
+ *		dummy read address (XSL_DRA)
+ */
+#define CXL_XSL_CONFIG_VERSION1		1
+struct cxllib_xsl_config {
+	u32	version;     /* format version for register encoding */
+	u32	log_bar_size;/* log size of the capi_window */
+	u64	bar_addr;    /* address of the start of capi window */
+	u64	dsnctl;      /* matches definition of XSL_DSNCTL */
+	u64	dra;         /* real address that can be used for dummy read */
+};
+
+int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg);
+
+
+/*
+ * Activate capi for the pci host bridge associated with the device.
+ * Can be extended to deactivate once we know how to do it.
+ * Device must be ready to accept messages from the CAPP unit and
+ * respond accordingly (TLB invalidates, ...)
+ *
+ * PHB is switched to capi mode through calls to skiboot.
+ * CAPP snooping is activated
+ *
+ * Input:
+ *	dev: device whose PHB should switch mode
+ *	mode: mode to switch to i.e. CAPI or PCI
+ *	flags: options related to the mode
+ */
+enum cxllib_mode {
+	CXL_MODE_CXL,
+	CXL_MODE_PCI,
+};
+
+#define CXL_MODE_NO_DMA       0
+#define CXL_MODE_DMA_TVT0     1
+#define CXL_MODE_DMA_TVT1     2
+
+int cxllib_switch_phb_mode(struct pci_dev *dev, enum cxllib_mode mode,
+			unsigned long flags);
+
+
+/*
+ * Set the device for capi DMA.
+ * Define its dma_ops and dma offset so that allocations will be using TVT#1
+ *
+ * Input:
+ *	dev: device to set
+ *	flags: options. CXL_MODE_DMA_TVT1 should be used
+ */
+int cxllib_set_device_dma(struct pci_dev *dev, unsigned long flags);
+
+
+/*
+ * Get the Process Element structure for the given thread
+ *
+ * Input:
+ *    task: task_struct for the context of the translation
+ *    translation_mode: whether addresses should be translated
+ * Output:
+ *    attr: attributes to fill up the Process Element structure from CAIA
+ */
+struct cxllib_pe_attributes {
+	u64 sr;
+	u32 lpid;
+	u32 tid;
+	u32 pid;
+};
+#define CXL_TRANSLATED_MODE 0
+#define CXL_REAL_MODE 1
+
+int cxllib_get_PE_attributes(struct task_struct *task,
+	     unsigned long translation_mode, struct cxllib_pe_attributes *attr);
+
+
+/*
+ * Handle memory fault.
+ * Fault in all the pages of the specified buffer for the permissions
+ * provided in ‘flags’
+ *
+ * Shouldn't be called from interrupt context
+ *
+ * Input:
+ *	mm: struct mm for the thread faulting the pages
+ *	addr: base address of the buffer to page in
+ *	size: size of the buffer to page in
+ *	flags: permission requested (DSISR_ISSTORE...)
+ */
+int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags);
+
+
+#endif /* _MISC_CXLLIB_H */
-- 
cgit v1.2.3