From 6855e95ce3256cdb5f4fbc988fe8ee925b051ef7 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Wed, 28 Aug 2013 19:48:15 +0000
Subject: arc: Replace __get_cpu_var uses

__get_cpu_var() is used for multiple purposes in the kernel source. One of them is
address calculation via the form &__get_cpu_var(x). This calculates the address for
the instance of the percpu variable of the current processor based on an offset.

Other use cases are for storing and retrieving data from the current processors percpu area.
__get_cpu_var() can be used as an lvalue when writing data or on the right side of an assignment.

__get_cpu_var() is defined as :

#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))

__get_cpu_var() always only does an address determination. However, store and retrieve operations
could use a segment prefix (or global register on other platforms) to avoid the address calculation.

this_cpu_write() and this_cpu_read() can directly take an offset into a percpu area and use
optimized assembly code to read and write per cpu variables.

This patch converts __get_cpu_var into either an explicit address calculation using this_cpu_ptr()
or into a use of this_cpu operations that use the offset. Thereby address calcualtions are avoided
and less registers are used when code is generated.

At the end of the patchset all uses of __get_cpu_var have been removed so the macro is removed too.

The patchset includes passes over all arches as well. Once these operations are used throughout then
specialized macros can be defined in non -x86 arches as well in order to optimize per cpu access by
f.e. using a global register that may be set to the per cpu base.

Transformations done to __get_cpu_var()

1. Determine the address of the percpu instance of the current processor.

	DEFINE_PER_CPU(int, y);
	int *x = &__get_cpu_var(y);

    Converts to

	int *x = this_cpu_ptr(&y);

2. Same as #1 but this time an array structure is involved.

	DEFINE_PER_CPU(int, y[20]);
	int *x = __get_cpu_var(y);

    Converts to

	int *x = this_cpu_ptr(y);

3. Retrieve the content of the current processors instance of a per cpu variable.

	DEFINE_PER_CPU(int, u);
	int x = __get_cpu_var(y)

   Converts to

	int x = __this_cpu_read(y);

4. Retrieve the content of a percpu struct

	DEFINE_PER_CPU(struct mystruct, y);
	struct mystruct x = __get_cpu_var(y);

   Converts to

	memcpy(this_cpu_ptr(&x), y, sizeof(x));

5. Assignment to a per cpu variable

	DEFINE_PER_CPU(int, y)
	__get_cpu_var(y) = x;

   Converts to

	this_cpu_write(y, x);

6. Increment/Decrement etc of a per cpu variable

	DEFINE_PER_CPU(int, y);
	__get_cpu_var(y)++

   Converts to

	this_cpu_inc(y)

Acked-by: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Christoph Lameter <cl@linux.com>
---
 arch/arc/kernel/kprobes.c | 6 +++---
 arch/arc/kernel/time.c    | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/kernel/kprobes.c b/arch/arc/kernel/kprobes.c
index 72f97822784a..eb1c2ee5eaf0 100644
--- a/arch/arc/kernel/kprobes.c
+++ b/arch/arc/kernel/kprobes.c
@@ -87,13 +87,13 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
 
 static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
 {
-	__get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
+	__this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
 	kcb->kprobe_status = kcb->prev_kprobe.status;
 }
 
 static inline void __kprobes set_current_kprobe(struct kprobe *p)
 {
-	__get_cpu_var(current_kprobe) = p;
+	__this_cpu_write(current_kprobe, p);
 }
 
 static void __kprobes resume_execution(struct kprobe *p, unsigned long addr,
@@ -237,7 +237,7 @@ int __kprobes arc_kprobe_handler(unsigned long addr, struct pt_regs *regs)
 
 		return 1;
 	} else if (kprobe_running()) {
-		p = __get_cpu_var(current_kprobe);
+		p = __this_cpu_read(current_kprobe);
 		if (p->break_handler && p->break_handler(p, regs)) {
 			setup_singlestep(p, regs);
 			kcb->kprobe_status = KPROBE_HIT_SS;
diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c
index 3fde7de3ea67..4c21dde2f6a9 100644
--- a/arch/arc/kernel/time.c
+++ b/arch/arc/kernel/time.c
@@ -206,7 +206,7 @@ static DEFINE_PER_CPU(struct clock_event_device, arc_clockevent_device) = {
 
 static irqreturn_t timer_irq_handler(int irq, void *dev_id)
 {
-	struct clock_event_device *clk = &__get_cpu_var(arc_clockevent_device);
+	struct clock_event_device *clk = this_cpu_ptr(&arc_clockevent_device);
 
 	arc_timer_event_ack(clk->mode == CLOCK_EVT_MODE_PERIODIC);
 	clk->event_handler(clk);
-- 
cgit v1.2.3


From 8e457d6a75421372c7721388c46d21381fd5b451 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Wed, 4 Sep 2013 16:13:35 +0530
Subject: ARC: Annotate some functions as static

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/setup.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index 2c68bc7e6a78..d9e15f16633e 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -37,8 +37,7 @@ struct task_struct *_current_task[NR_CPUS];	/* For stack switching */
 
 struct cpuinfo_arc cpuinfo_arc700[NR_CPUS];
 
-
-void read_arc_build_cfg_regs(void)
+static void read_arc_build_cfg_regs(void)
 {
 	struct bcr_perip uncached_space;
 	struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
@@ -106,7 +105,7 @@ static const struct cpuinfo_data arc_cpu_tbl[] = {
 	{ {0x00, NULL		} }
 };
 
-char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
+static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
 {
 	int n = 0;
 	struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id];
@@ -171,7 +170,7 @@ static const struct id_to_str mac_mul_nm[] = {
 	{0x6, "Dual 16x16 and 32x16"}
 };
 
-char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
+static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
 {
 	int n = 0;
 	struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id];
@@ -234,7 +233,7 @@ char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
 	return buf;
 }
 
-void arc_chk_ccms(void)
+static void arc_chk_ccms(void)
 {
 #if defined(CONFIG_ARC_HAS_DCCM) || defined(CONFIG_ARC_HAS_ICCM)
 	struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
@@ -269,7 +268,7 @@ void arc_chk_ccms(void)
  * hardware has dedicated regs which need to be saved/restored on ctx-sw
  * (Single Precision uses core regs), thus kernel is kind of oblivious to it
  */
-void arc_chk_fpu(void)
+static void arc_chk_fpu(void)
 {
 	struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
 
-- 
cgit v1.2.3


From 064a6269245655b15054ad9783bca9bfd8dc1f9e Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Thu, 31 Oct 2013 13:53:54 +0530
Subject: ARC: use __weak instead of __attribute__((weak))

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/smp.c  | 2 +-
 arch/arc/kernel/time.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index bca3052c956d..7f0ab1ecd640 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -95,7 +95,7 @@ void __init smp_cpus_done(unsigned int max_cpus)
  *        If it turns out to be elaborate, it's better to code it in assembly
  *
  */
-void __attribute__((weak)) arc_platform_smp_wait_to_boot(int cpu)
+void __weak arc_platform_smp_wait_to_boot(int cpu)
 {
 	/*
 	 * As a hack for debugging - since debugger will single-step over the
diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c
index 4c21dde2f6a9..0a9b6b289c4f 100644
--- a/arch/arc/kernel/time.c
+++ b/arch/arc/kernel/time.c
@@ -223,7 +223,7 @@ static struct irqaction arc_timer_irq = {
  * Setup the local event timer for @cpu
  * N.B. weak so that some exotic ARC SoCs can completely override it
  */
-void __attribute__((weak)) arc_local_timer_setup(unsigned int cpu)
+void __weak arc_local_timer_setup(unsigned int cpu)
 {
 	struct clock_event_device *clk = &per_cpu(arc_clockevent_device, cpu);
 
-- 
cgit v1.2.3


From f3e4de327403cee6f76c0dca1b45d6fb0b08daf4 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Thu, 5 Sep 2013 13:03:35 +0530
Subject: ARC: cacheflush refactor #1: push aux reg ascertaining into leaf
 routine

ARC dcache supports 3 ops - Inv, Flush, Flush-n-Inv.
The programming model however provides 2 commands FLUSH, INV.
INV will either discard or flush-n-discard (based on DT_CTRL bit)

The leaf helper __dc_line_loop() used to take the AUX register
(corresponding to the 2 commands). Now we push that to within the
helper, paving way for code consolidations to follow.

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/cache_arc700.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
index 5a1259cd948c..2787e5a2c306 100644
--- a/arch/arc/mm/cache_arc700.c
+++ b/arch/arc/mm/cache_arc700.c
@@ -296,8 +296,10 @@ static inline void __dc_entire_op(const int cacheop)
  * (aliasing VIPT dcache flushing needs both vaddr and paddr)
  */
 static inline void __dc_line_loop(unsigned long paddr, unsigned long vaddr,
-				  unsigned long sz, const int aux_reg)
+				  unsigned long sz, const int cacheop)
 {
+	/* which MMU cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
+	const int aux = cacheop & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
 	int num_lines;
 
 	/* Ensure we properly floor/ceil the non-line aligned/sized requests
@@ -326,11 +328,11 @@ static inline void __dc_line_loop(unsigned long paddr, unsigned long vaddr,
 		 */
 		write_aux_reg(ARC_REG_DC_PTAG, paddr);
 
-		write_aux_reg(aux_reg, vaddr);
+		write_aux_reg(aux, vaddr);
 		vaddr += ARC_DCACHE_LINE_LEN;
 #else
 		/* paddr contains stuffed vaddrs bits */
-		write_aux_reg(aux_reg, paddr);
+		write_aux_reg(aux, paddr);
 #endif
 		paddr += ARC_DCACHE_LINE_LEN;
 	}
@@ -346,7 +348,6 @@ static inline void __dc_line_op(unsigned long paddr, unsigned long vaddr,
 				unsigned long sz, const int cacheop)
 {
 	unsigned long flags, tmp = tmp;
-	int aux;
 
 	local_irq_save(flags);
 
@@ -361,12 +362,7 @@ static inline void __dc_line_op(unsigned long paddr, unsigned long vaddr,
 		write_aux_reg(ARC_REG_DC_CTRL, tmp | DC_CTRL_INV_MODE_FLUSH);
 	}
 
-	if (cacheop & OP_INV)	/* Inv / flush-n-inv use same cmd reg */
-		aux = ARC_REG_DC_IVDL;
-	else
-		aux = ARC_REG_DC_FLDL;
-
-	__dc_line_loop(paddr, vaddr, sz, aux);
+	__dc_line_loop(paddr, vaddr, sz, cacheop);
 
 	if (cacheop & OP_FLUSH)	/* flush / flush-n-inv both wait */
 		wait_for_flush();
-- 
cgit v1.2.3


From 63d2dfdbf4b12a6993adf5005fd308d611d453d6 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Thu, 5 Sep 2013 13:17:49 +0530
Subject: ARC: cacheflush refactor #2: I and D caches lines to have same size

Having them be different seems an obscure configuration.

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/cache.h |  8 +-------
 arch/arc/mm/cache_arc700.c   | 30 +++++++++++++++---------------
 2 files changed, 16 insertions(+), 22 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index e4abdaac6f9f..2fd3162ec4df 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -17,13 +17,7 @@
 #endif
 
 #define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
-
-/* For a rare case where customers have differently config I/D */
-#define ARC_ICACHE_LINE_LEN	L1_CACHE_BYTES
-#define ARC_DCACHE_LINE_LEN	L1_CACHE_BYTES
-
-#define ICACHE_LINE_MASK	(~(ARC_ICACHE_LINE_LEN - 1))
-#define DCACHE_LINE_MASK	(~(ARC_DCACHE_LINE_LEN - 1))
+#define CACHE_LINE_MASK		(~(L1_CACHE_BYTES - 1))
 
 /*
  * ARC700 doesn't cache any access in top 256M.
diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
index 2787e5a2c306..0be5b2075701 100644
--- a/arch/arc/mm/cache_arc700.c
+++ b/arch/arc/mm/cache_arc700.c
@@ -182,7 +182,7 @@ void arc_cache_init(void)
 
 #ifdef CONFIG_ARC_HAS_ICACHE
 	/* 1. Confirm some of I-cache params which Linux assumes */
-	if (ic->line_len != ARC_ICACHE_LINE_LEN)
+	if (ic->line_len != L1_CACHE_BYTES)
 		panic("Cache H/W doesn't match kernel Config");
 
 	if (ic->ver != CONFIG_ARC_MMU_VER)
@@ -205,7 +205,7 @@ chk_dc:
 		return;
 
 #ifdef CONFIG_ARC_HAS_DCACHE
-	if (dc->line_len != ARC_DCACHE_LINE_LEN)
+	if (dc->line_len != L1_CACHE_BYTES)
 		panic("Cache H/W doesn't match kernel Config");
 
 	/* check for D-Cache aliasing */
@@ -298,7 +298,7 @@ static inline void __dc_entire_op(const int cacheop)
 static inline void __dc_line_loop(unsigned long paddr, unsigned long vaddr,
 				  unsigned long sz, const int cacheop)
 {
-	/* which MMU cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
+	/* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
 	const int aux = cacheop & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
 	int num_lines;
 
@@ -309,12 +309,12 @@ static inline void __dc_line_loop(unsigned long paddr, unsigned long vaddr,
 	 *  -@sz will be integral multiple of line size (being page sized).
 	 */
 	if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) {
-		sz += paddr & ~DCACHE_LINE_MASK;
-		paddr &= DCACHE_LINE_MASK;
-		vaddr &= DCACHE_LINE_MASK;
+		sz += paddr & ~CACHE_LINE_MASK;
+		paddr &= CACHE_LINE_MASK;
+		vaddr &= CACHE_LINE_MASK;
 	}
 
-	num_lines = DIV_ROUND_UP(sz, ARC_DCACHE_LINE_LEN);
+	num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
 
 #if (CONFIG_ARC_MMU_VER <= 2)
 	paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
@@ -329,12 +329,12 @@ static inline void __dc_line_loop(unsigned long paddr, unsigned long vaddr,
 		write_aux_reg(ARC_REG_DC_PTAG, paddr);
 
 		write_aux_reg(aux, vaddr);
-		vaddr += ARC_DCACHE_LINE_LEN;
+		vaddr += L1_CACHE_BYTES;
 #else
 		/* paddr contains stuffed vaddrs bits */
 		write_aux_reg(aux, paddr);
 #endif
-		paddr += ARC_DCACHE_LINE_LEN;
+		paddr += L1_CACHE_BYTES;
 	}
 }
 
@@ -443,12 +443,12 @@ static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr,
 	 *  -@sz will be integral multiple of line size (being page sized).
 	 */
 	if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) {
-		sz += paddr & ~ICACHE_LINE_MASK;
-		paddr &= ICACHE_LINE_MASK;
-		vaddr &= ICACHE_LINE_MASK;
+		sz += paddr & ~CACHE_LINE_MASK;
+		paddr &= CACHE_LINE_MASK;
+		vaddr &= CACHE_LINE_MASK;
 	}
 
-	num_lines = DIV_ROUND_UP(sz, ARC_ICACHE_LINE_LEN);
+	num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
 
 #if (CONFIG_ARC_MMU_VER <= 2)
 	/* bits 17:13 of vaddr go as bits 4:0 of paddr */
@@ -463,12 +463,12 @@ static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr,
 
 		/* index bits come from vaddr */
 		write_aux_reg(ARC_REG_IC_IVIL, vaddr);
-		vaddr += ARC_ICACHE_LINE_LEN;
+		vaddr += L1_CACHE_BYTES;
 #else
 		/* paddr contains stuffed vaddrs bits */
 		write_aux_reg(ARC_REG_IC_IVIL, paddr);
 #endif
-		paddr += ARC_ICACHE_LINE_LEN;
+		paddr += L1_CACHE_BYTES;
 	}
 	local_irq_restore(flags);
 }
-- 
cgit v1.2.3


From bd12976c366486ea90aebd83f1cf2863ee47c76a Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Thu, 5 Sep 2013 13:43:03 +0530
Subject: ARC: cacheflush refactor #3: Unify the {d,i}cache flush leaf helpers

With Line length being constant now, we can fold the 2 helpers into 1.
This allows applying any optimizations (forthcoming) to single place.

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/cache_arc700.c | 139 ++++++++++++++++++---------------------------
 1 file changed, 55 insertions(+), 84 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
index 0be5b2075701..a152f3263ac0 100644
--- a/arch/arc/mm/cache_arc700.c
+++ b/arch/arc/mm/cache_arc700.c
@@ -240,6 +240,59 @@ chk_dc:
 #define OP_INV		0x1
 #define OP_FLUSH	0x2
 #define OP_FLUSH_N_INV	0x3
+#define OP_INV_IC	0x4
+
+/*
+ * Common Helper for Line Operations on {I,D}-Cache
+ */
+static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr,
+				     unsigned long sz, const int cacheop)
+{
+	unsigned int aux_cmd, aux_tag;
+	int num_lines;
+
+	if (cacheop == OP_INV_IC) {
+		aux_cmd = ARC_REG_IC_IVIL;
+		aux_tag = ARC_REG_IC_PTAG;
+	}
+	else {
+		/* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
+		aux_cmd = cacheop & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
+		aux_tag = ARC_REG_DC_PTAG;
+	}
+
+	/* Ensure we properly floor/ceil the non-line aligned/sized requests
+	 * and have @paddr - aligned to cache line and integral @num_lines.
+	 * This however can be avoided for page sized since:
+	 *  -@paddr will be cache-line aligned already (being page aligned)
+	 *  -@sz will be integral multiple of line size (being page sized).
+	 */
+	if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) {
+		sz += paddr & ~CACHE_LINE_MASK;
+		paddr &= CACHE_LINE_MASK;
+		vaddr &= CACHE_LINE_MASK;
+	}
+
+	num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
+
+#if (CONFIG_ARC_MMU_VER <= 2)
+	/* MMUv2 and before: paddr contains stuffed vaddrs bits */
+	paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
+#endif
+
+	while (num_lines-- > 0) {
+#if (CONFIG_ARC_MMU_VER > 2)
+		/* MMUv3, cache ops require paddr seperately */
+		write_aux_reg(ARC_REG_DC_PTAG, paddr);
+
+		write_aux_reg(aux_cmd, vaddr);
+		vaddr += L1_CACHE_BYTES;
+#else
+		write_aux_reg(aux, paddr);
+#endif
+		paddr += L1_CACHE_BYTES;
+	}
+}
 
 #ifdef CONFIG_ARC_HAS_DCACHE
 
@@ -289,55 +342,6 @@ static inline void __dc_entire_op(const int cacheop)
 		write_aux_reg(ARC_REG_DC_CTRL, tmp & ~DC_CTRL_INV_MODE_FLUSH);
 }
 
-/*
- * Per Line Operation on D-Cache
- * Doesn't deal with type-of-op/IRQ-disabling/waiting-for-flush-to-complete
- * It's sole purpose is to help gcc generate ZOL
- * (aliasing VIPT dcache flushing needs both vaddr and paddr)
- */
-static inline void __dc_line_loop(unsigned long paddr, unsigned long vaddr,
-				  unsigned long sz, const int cacheop)
-{
-	/* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
-	const int aux = cacheop & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
-	int num_lines;
-
-	/* Ensure we properly floor/ceil the non-line aligned/sized requests
-	 * and have @paddr - aligned to cache line and integral @num_lines.
-	 * This however can be avoided for page sized since:
-	 *  -@paddr will be cache-line aligned already (being page aligned)
-	 *  -@sz will be integral multiple of line size (being page sized).
-	 */
-	if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) {
-		sz += paddr & ~CACHE_LINE_MASK;
-		paddr &= CACHE_LINE_MASK;
-		vaddr &= CACHE_LINE_MASK;
-	}
-
-	num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
-
-#if (CONFIG_ARC_MMU_VER <= 2)
-	paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
-#endif
-
-	while (num_lines-- > 0) {
-#if (CONFIG_ARC_MMU_VER > 2)
-		/*
-		 * Just as for I$, in MMU v3, D$ ops also require
-		 * "tag" bits in DC_PTAG, "index" bits in FLDL,IVDL ops
-		 */
-		write_aux_reg(ARC_REG_DC_PTAG, paddr);
-
-		write_aux_reg(aux, vaddr);
-		vaddr += L1_CACHE_BYTES;
-#else
-		/* paddr contains stuffed vaddrs bits */
-		write_aux_reg(aux, paddr);
-#endif
-		paddr += L1_CACHE_BYTES;
-	}
-}
-
 /* For kernel mappings cache operation: index is same as paddr */
 #define __dc_line_op_k(p, sz, op)	__dc_line_op(p, p, sz, op)
 
@@ -362,7 +366,7 @@ static inline void __dc_line_op(unsigned long paddr, unsigned long vaddr,
 		write_aux_reg(ARC_REG_DC_CTRL, tmp | DC_CTRL_INV_MODE_FLUSH);
 	}
 
-	__dc_line_loop(paddr, vaddr, sz, cacheop);
+	__cache_line_loop(paddr, vaddr, sz, cacheop);
 
 	if (cacheop & OP_FLUSH)	/* flush / flush-n-inv both wait */
 		wait_for_flush();
@@ -434,42 +438,9 @@ static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr,
 				unsigned long sz)
 {
 	unsigned long flags;
-	int num_lines;
-
-	/*
-	 * Ensure we properly floor/ceil the non-line aligned/sized requests:
-	 * However page sized flushes can be compile time optimised.
-	 *  -@paddr will be cache-line aligned already (being page aligned)
-	 *  -@sz will be integral multiple of line size (being page sized).
-	 */
-	if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) {
-		sz += paddr & ~CACHE_LINE_MASK;
-		paddr &= CACHE_LINE_MASK;
-		vaddr &= CACHE_LINE_MASK;
-	}
-
-	num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
-
-#if (CONFIG_ARC_MMU_VER <= 2)
-	/* bits 17:13 of vaddr go as bits 4:0 of paddr */
-	paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
-#endif
 
 	local_irq_save(flags);
-	while (num_lines-- > 0) {
-#if (CONFIG_ARC_MMU_VER > 2)
-		/* tag comes from phy addr */
-		write_aux_reg(ARC_REG_IC_PTAG, paddr);
-
-		/* index bits come from vaddr */
-		write_aux_reg(ARC_REG_IC_IVIL, vaddr);
-		vaddr += L1_CACHE_BYTES;
-#else
-		/* paddr contains stuffed vaddrs bits */
-		write_aux_reg(ARC_REG_IC_IVIL, paddr);
-#endif
-		paddr += L1_CACHE_BYTES;
-	}
+	__cache_line_loop(paddr, vaddr, sz, OP_INV_IC);
 	local_irq_restore(flags);
 }
 
-- 
cgit v1.2.3


From d4599baf5c773660f32ee6bc35c1afab009a52d9 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Thu, 5 Sep 2013 14:45:51 +0530
Subject: ARC: cacheflush optim - PTAG can be loop invariant if V-P is const

Line op needs vaddr (indexing) and paddr (tag match). For page sized
flushes (V-P const), each line op will need a different index, but the
tag bits wil remain constant, hence paddr can be setup once outside the
loop.

This improves select LMBench numbers for Aliasing dcache where we have
more "preventive" cache flushing.

Processor, Processes - times in microseconds - smaller is better
------------------------------------------------------------------------------
Host                 OS  Mhz null null      open slct sig  sig  fork exec sh
                             call  I/O stat clos TCP  inst hndl proc proc proc
--------- ------------- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
3.11-rc7- Linux 3.11.0-   80 4.66 8.88 69.7 112. 268. 8.60 28.0 3489 13.K 27.K	# Non alias ARC700
3.11-rc7- Linux 3.11.0-   80 4.64 8.51 68.6 98.5 271. 8.58 28.1 4160 15.K 32.K	# Aliasing
3.11-rc7- Linux 3.11.0-   80 4.64 8.51 69.8 99.4 270. 8.73 27.5 3880 15.K 31.K	# PTAG loop Inv

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/cache_arc700.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
index a152f3263ac0..6b58c1de7577 100644
--- a/arch/arc/mm/cache_arc700.c
+++ b/arch/arc/mm/cache_arc700.c
@@ -250,6 +250,7 @@ static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr,
 {
 	unsigned int aux_cmd, aux_tag;
 	int num_lines;
+	const int full_page_op = __builtin_constant_p(sz) && sz == PAGE_SIZE;
 
 	if (cacheop == OP_INV_IC) {
 		aux_cmd = ARC_REG_IC_IVIL;
@@ -267,7 +268,7 @@ static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr,
 	 *  -@paddr will be cache-line aligned already (being page aligned)
 	 *  -@sz will be integral multiple of line size (being page sized).
 	 */
-	if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) {
+	if (!full_page_op) {
 		sz += paddr & ~CACHE_LINE_MASK;
 		paddr &= CACHE_LINE_MASK;
 		vaddr &= CACHE_LINE_MASK;
@@ -278,19 +279,26 @@ static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr,
 #if (CONFIG_ARC_MMU_VER <= 2)
 	/* MMUv2 and before: paddr contains stuffed vaddrs bits */
 	paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
+#else
+	/* if V-P const for loop, PTAG can be written once outside loop */
+	if (full_page_op)
+		write_aux_reg(ARC_REG_DC_PTAG, paddr);
 #endif
 
 	while (num_lines-- > 0) {
 #if (CONFIG_ARC_MMU_VER > 2)
 		/* MMUv3, cache ops require paddr seperately */
-		write_aux_reg(ARC_REG_DC_PTAG, paddr);
+		if (!full_page_op) {
+			write_aux_reg(aux_tag, paddr);
+			paddr += L1_CACHE_BYTES;
+		}
 
 		write_aux_reg(aux_cmd, vaddr);
 		vaddr += L1_CACHE_BYTES;
 #else
 		write_aux_reg(aux, paddr);
-#endif
 		paddr += L1_CACHE_BYTES;
+#endif
 	}
 }
 
-- 
cgit v1.2.3


From 21a63b56044706aa37637315dd27d9d465bbd5c4 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Wed, 18 Sep 2013 16:25:40 +0530
Subject: ARC: Change calling convention of do_page_fault()

switch the args (address, pt_regs) to match with all the other "C"
exception handlers.

This removes the awkwardness in EV_ProtV for page fault vs. unaligned
access.

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/entry.S | 9 ++++-----
 arch/arc/mm/fault.c     | 2 +-
 arch/arc/mm/tlbex.S     | 4 ++--
 3 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
index b908dde8a331..eb3f3c5a700a 100644
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S
@@ -337,9 +337,9 @@ ARC_ENTRY EV_TLBProtV
 	;  vineetg: Mar 6th: Random Seg Fault issue #1
 	;  ecr and efa were not saved in case an Intr sneaks in
 	;  after fake rtie
-	;
+
 	lr  r2, [ecr]
-	lr  r1, [efa]	; Faulting Data address
+	lr  r0, [efa]	; Faulting Data address
 
 	; --------(4) Return from CPU Exception Mode ---------
 	;  Fake a rtie, but rtie to next label
@@ -348,6 +348,8 @@ ARC_ENTRY EV_TLBProtV
 
 	FAKE_RET_FROM_EXCPN r9
 
+	mov   r1, sp
+
 	;------ (5) Type of Protection Violation? ----------
 	;
 	; ProtV Hardware Exception is triggered for Access Faults of 2 types
@@ -358,14 +360,11 @@ ARC_ENTRY EV_TLBProtV
 	bbit1 r2, ECR_C_BIT_PROTV_MISALIG_DATA, 4f
 
 	;========= (6a) Access Violation Processing ========
-	mov r0, sp              ; pt_regs
 	bl  do_page_fault
 	b   ret_from_exception
 
 	;========== (6b) Non aligned access ============
 4:
-	mov r0, r1
-	mov r1, sp              ; pt_regs
 
 #ifdef  CONFIG_ARC_MISALIGN_ACCESS
 	SAVE_CALLEE_SAVED_USER
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index 0c14d8a52683..9c69552350c4 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -52,7 +52,7 @@ bad_area:
 	return 1;
 }
 
-void do_page_fault(struct pt_regs *regs, unsigned long address)
+void do_page_fault(unsigned long address, struct pt_regs *regs)
 {
 	struct vm_area_struct *vma = NULL;
 	struct task_struct *tsk = current;
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index cf7d7d9ad695..3fcfdb38d242 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -369,8 +369,8 @@ do_slow_path_pf:
 	EXCEPTION_PROLOGUE
 
 	; ------- setup args for Linux Page fault Hanlder ---------
-	mov_s r0, sp
-	lr  r1, [efa]
+	mov_s r1, sp
+	lr    r0, [efa]
 
 	; We don't want exceptions to be disabled while the fault is handled.
 	; Now that we have saved the context we return from exception hence
-- 
cgit v1.2.3


From 07ba69a46c3394fe6675483275337e5a79a959e0 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Wed, 18 Sep 2013 18:08:01 +0530
Subject: ARC: Reduce #ifdef'ery for unaligned access emulation

Emulation not enabled is treated as if the fixup failed, so no need for
special #ifdef checks.

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/unaligned.h | 3 ++-
 arch/arc/kernel/entry.S          | 4 ----
 arch/arc/kernel/traps.c          | 3 +--
 3 files changed, 3 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/unaligned.h b/arch/arc/include/asm/unaligned.h
index 60702f3751d2..3e5f071bc00c 100644
--- a/arch/arc/include/asm/unaligned.h
+++ b/arch/arc/include/asm/unaligned.h
@@ -22,7 +22,8 @@ static inline int
 misaligned_fixup(unsigned long address, struct pt_regs *regs,
 		 struct callee_regs *cregs)
 {
-	return 0;
+	/* Not fixed */
+	return 1;
 }
 #endif
 
diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
index eb3f3c5a700a..daa0e7990270 100644
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S
@@ -366,7 +366,6 @@ ARC_ENTRY EV_TLBProtV
 	;========== (6b) Non aligned access ============
 4:
 
-#ifdef  CONFIG_ARC_MISALIGN_ACCESS
 	SAVE_CALLEE_SAVED_USER
 	mov r2, sp              ; callee_regs
 
@@ -375,9 +374,6 @@ ARC_ENTRY EV_TLBProtV
 	; TBD: optimize - do this only if a callee reg was involved
 	; either a dst of emulated LD/ST or src with address-writeback
 	RESTORE_CALLEE_SAVED_USER
-#else
-	bl  do_misaligned_error
-#endif
 
 	b   ret_from_exception
 
diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c
index e21692d2fdab..3eadfdabc322 100644
--- a/arch/arc/kernel/traps.c
+++ b/arch/arc/kernel/traps.c
@@ -84,19 +84,18 @@ DO_ERROR_INFO(SIGBUS, "Invalid Mem Access", do_memory_error, BUS_ADRERR)
 DO_ERROR_INFO(SIGTRAP, "Breakpoint Set", trap_is_brkpt, TRAP_BRKPT)
 DO_ERROR_INFO(SIGBUS, "Misaligned Access", do_misaligned_error, BUS_ADRALN)
 
-#ifdef CONFIG_ARC_MISALIGN_ACCESS
 /*
  * Entry Point for Misaligned Data access Exception, for emulating in software
  */
 int do_misaligned_access(unsigned long address, struct pt_regs *regs,
 			 struct callee_regs *cregs)
 {
+	/* If emulation not enabled, or failed, kill the task */
 	if (misaligned_fixup(address, regs, cregs) != 0)
 		return do_misaligned_error(address, regs);
 
 	return 0;
 }
-#endif
 
 /*
  * Entry point for miscll errors such as Nested Exceptions
-- 
cgit v1.2.3


From 54c8bff14d604de23d0718eee59c5436a4703fe5 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Thu, 12 Sep 2013 13:53:06 +0530
Subject: ARC: Reset the value of Interrupt Priority Register

In case bootloader has changed the priority of one/more IRQ lines

Reported-by: Noam Camus <noamc@ezchip.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/irq.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c
index 5fc92455da36..02684e1a0cf0 100644
--- a/arch/arc/kernel/irq.c
+++ b/arch/arc/kernel/irq.c
@@ -39,10 +39,14 @@ void arc_init_IRQ(void)
 	level_mask |= IS_ENABLED(CONFIG_ARC_IRQ5_LV2) << 5;
 	level_mask |= IS_ENABLED(CONFIG_ARC_IRQ6_LV2) << 6;
 
-	if (level_mask) {
+	/*
+	 * Write to register, even if no LV2 IRQs configured to reset it
+	 * in case bootloader had mucked with it
+	 */
+	write_aux_reg(AUX_IRQ_LEV, level_mask);
+
+	if (level_mask)
 		pr_info("Level-2 interrupts bitset %x\n", level_mask);
-		write_aux_reg(AUX_IRQ_LEV, level_mask);
-	}
 }
 
 /*
-- 
cgit v1.2.3


From 0dafafc3ef42bad34fd446725cb9778c3bdd83a8 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 6 Sep 2013 14:18:17 +0530
Subject: ARC: Add support for irqflags tracing and lockdep

Lockdep required a small fix to stacktrace API which was incorrectly
unwindign out of __switch_to for the current call frame.

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/Kconfig                |  6 ++++++
 arch/arc/include/asm/irqflags.h | 22 ++++++++++++++++++++++
 arch/arc/kernel/entry.S         | 11 +++++++++++
 arch/arc/kernel/stacktrace.c    |  4 +++-
 4 files changed, 42 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 91dbb2757afd..524e6fce2c89 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -35,6 +35,12 @@ config ARC
 	select PERF_USE_VMALLOC
 	select HAVE_DEBUG_STACKOVERFLOW
 
+config TRACE_IRQFLAGS_SUPPORT
+	def_bool y
+
+config LOCKDEP_SUPPORT
+	def_bool y
+
 config SCHED_OMIT_FRAME_POINTER
 	def_bool y
 
diff --git a/arch/arc/include/asm/irqflags.h b/arch/arc/include/asm/irqflags.h
index b68b53f458d1..cb7efc29f16f 100644
--- a/arch/arc/include/asm/irqflags.h
+++ b/arch/arc/include/asm/irqflags.h
@@ -151,16 +151,38 @@ static inline void arch_unmask_irq(unsigned int irq)
 
 #else
 
+#ifdef CONFIG_TRACE_IRQFLAGS
+
+.macro TRACE_ASM_IRQ_DISABLE
+	bl	trace_hardirqs_off
+.endm
+
+.macro TRACE_ASM_IRQ_ENABLE
+	bl	trace_hardirqs_on
+.endm
+
+#else
+
+.macro TRACE_ASM_IRQ_DISABLE
+.endm
+
+.macro TRACE_ASM_IRQ_ENABLE
+.endm
+
+#endif
+
 .macro IRQ_DISABLE  scratch
 	lr	\scratch, [status32]
 	bic	\scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK)
 	flag	\scratch
+	TRACE_ASM_IRQ_DISABLE
 .endm
 
 .macro IRQ_ENABLE  scratch
 	lr	\scratch, [status32]
 	or	\scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK)
 	flag	\scratch
+	TRACE_ASM_IRQ_ENABLE
 .endm
 
 #endif	/* __ASSEMBLY__ */
diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
index daa0e7990270..47d09d07f093 100644
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S
@@ -250,6 +250,14 @@ ARC_ENTRY handle_interrupt_level1
 	lr  r0, [icause1]
 	and r0, r0, 0x1f
 
+#ifdef CONFIG_TRACE_IRQFLAGS
+	; icause1 needs to be read early, before calling tracing, which
+	; can clobber scratch regs, hence use of stack to stash it
+	push r0
+	TRACE_ASM_IRQ_DISABLE
+	pop  r0
+#endif
+
 	bl.d  @arch_do_IRQ
 	mov r1, sp
 
@@ -570,6 +578,7 @@ resume_user_mode_begin:
 	; --- (Slow Path #2) pending signal  ---
 	mov r0, sp	; pt_regs for arg to do_signal()/do_notify_resume()
 
+	GET_CURR_THR_INFO_FLAGS   r9
 	bbit0  r9, TIF_SIGPENDING, .Lchk_notify_resume
 
 	; Normal Trap/IRQ entry only saves Scratch (caller-saved) regs
@@ -635,6 +644,8 @@ resume_kernel_mode:
 
 restore_regs :
 
+	TRACE_ASM_IRQ_ENABLE
+
 	lr	r10, [status32]
 
 	; Restore REG File. In case multiple Events outstanding,
diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c
index f8b7d880304d..ab97b034922f 100644
--- a/arch/arc/kernel/stacktrace.c
+++ b/arch/arc/kernel/stacktrace.c
@@ -237,11 +237,13 @@ unsigned int get_wchan(struct task_struct *tsk)
  */
 void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 {
+	/* Assumes @tsk is sleeping so unwinds from __switch_to */
 	arc_unwind_core(tsk, NULL, __collect_all_but_sched, trace);
 }
 
 void save_stack_trace(struct stack_trace *trace)
 {
-	arc_unwind_core(current, NULL, __collect_all, trace);
+	/* Pass NULL for task so it unwinds the current call frame */
+	arc_unwind_core(NULL, NULL, __collect_all, trace);
 }
 #endif
-- 
cgit v1.2.3


From 0a4c40a3b702730c8b1ad0952e6501e84fadd395 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 27 Sep 2013 18:20:06 +0530
Subject: ARC: Fix bogus gcc warning and micro-optimise TLB iteration loop
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

------------------>8----------------------
arch/arc/mm/tlb.c: In function ‘do_tlb_overlap_fault’:
arch/arc/mm/tlb.c:688:13: warning: array subscript is above array bounds
[-Warray-bounds]
         (pd0[n] & PAGE_MASK)) {
             ^
------------------>8----------------------

While at it, remove the usless last iteration of outer loop when reading
a TLB SET for duplicate entries.

Suggested-by: Mischa Jonker <mjonker@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/tlb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 71cb26df4255..5f53050abf60 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -610,9 +610,9 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
 			  struct pt_regs *regs)
 {
 	int set, way, n;
-	unsigned int pd0[4], pd1[4];	/* assume max 4 ways */
 	unsigned long flags, is_valid;
 	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
+	unsigned int pd0[mmu->ways], pd1[mmu->ways];
 
 	local_irq_save(flags);
 
@@ -637,7 +637,7 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
 			continue;
 
 		/* Scan the set for duplicate ways: needs a nested loop */
-		for (way = 0; way < mmu->ways; way++) {
+		for (way = 0; way < mmu->ways - 1; way++) {
 			if (!pd0[way])
 				continue;
 
-- 
cgit v1.2.3


From 3d01c1ce41fae0fb1d8e50589f9fb3b1fa376c1c Mon Sep 17 00:00:00 2001
From: Chen Gang <gang.chen@asianux.com>
Date: Thu, 24 Oct 2013 11:50:09 +0800
Subject: arc: kgdb: add default implementation for kgdb_roundup_cpus()

arc supports kgdb, but need update -- add function kgdb_roundup_cpus(),
or can not pass compiling. At present, add the simple generic one just
like other architectures(e.g. tile, mips ...).

The related error (with allmodconfig):

  kernel/built-in.o: In function `kgdb_cpu_enter':
  kernel/debug/debug_core.c:580: undefined reference to `kgdb_roundup_cpus'

Signed-off-by: Chen Gang <gang.chen@asianux.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/kgdb.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'arch')

diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c
index a7698fb14818..a2ff5c5d1450 100644
--- a/arch/arc/kernel/kgdb.c
+++ b/arch/arc/kernel/kgdb.c
@@ -196,6 +196,18 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip)
 	instruction_pointer(regs) = ip;
 }
 
+static void kgdb_call_nmi_hook(void *ignored)
+{
+	kgdb_nmicallback(raw_smp_processor_id(), NULL);
+}
+
+void kgdb_roundup_cpus(unsigned long flags)
+{
+	local_irq_enable();
+	smp_call_function(kgdb_call_nmi_hook, NULL, 0);
+	local_irq_disable();
+}
+
 struct kgdb_arch arch_kgdb_ops = {
 	/* breakpoint instruction: TRAP_S 0x3 */
 #ifdef CONFIG_CPU_BIG_ENDIAN
-- 
cgit v1.2.3


From ef3a661af69046df74beb0ddfa27204aad316385 Mon Sep 17 00:00:00 2001
From: Chen Gang <gang.chen@asianux.com>
Date: Wed, 23 Oct 2013 10:12:05 +0800
Subject: arc: remove '__init' for setup_processor() and arc_init_IRQ()

They haven't '__init' in definition, but has '__init' in declaration.
And normal function start_kernel_secondary() may call setup_processor()
which will call arc_init_IRQ().

So need remove '__init' for both of them. The related warning (with
allmodconfig):

    MODPOST vmlinux.o
  WARNING: vmlinux.o(.text+0x3084): Section mismatch in reference from the function start_kernel_secondary() to the function .init.text:setup_processor()
  The function start_kernel_secondary() references
  the function __init setup_processor().
  This is often because start_kernel_secondary lacks a __init
  annotation or the annotation of setup_processor is wrong.

Signed-off-by: Chen Gang <gang.chen@asianux.com>
---
 arch/arc/include/asm/irq.h   | 2 +-
 arch/arc/include/asm/setup.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h
index c0a72105ee0b..548207f073da 100644
--- a/arch/arc/include/asm/irq.h
+++ b/arch/arc/include/asm/irq.h
@@ -18,7 +18,7 @@
 
 #include <asm-generic/irq.h>
 
-extern void __init arc_init_IRQ(void);
+extern void arc_init_IRQ(void);
 extern int __init get_hw_config_num_irq(void);
 
 void arc_local_timer_setup(unsigned int cpu);
diff --git a/arch/arc/include/asm/setup.h b/arch/arc/include/asm/setup.h
index 229e50681497..e10f8cef56a8 100644
--- a/arch/arc/include/asm/setup.h
+++ b/arch/arc/include/asm/setup.h
@@ -31,7 +31,7 @@ struct cpuinfo_data {
 extern int root_mountflags, end_mem;
 extern int running_on_hw;
 
-void __init setup_processor(void);
+void setup_processor(void);
 void __init setup_arch_memory(void);
 
 #endif /* __ASMARC_SETUP_H */
-- 
cgit v1.2.3


From 8f5d221b0641a9a16311a6e5c056a5b583ef0cbb Mon Sep 17 00:00:00 2001
From: Chen Gang <gang.chen@asianux.com>
Date: Wed, 23 Oct 2013 10:16:38 +0800
Subject: arc: remove '__init' for first_lines_of_secondary()

first_lines_of_secondary() is a '__init' function, but it may be called
by __cpu_up() by _cpu_up() by cpu_up() which is a normal export symbol
function. So recommend to remove '__init'.

The related warning (with allmodconfig):

    MODPOST vmlinux.o
  WARNING: vmlinux.o(.text+0x315c): Section mismatch in reference from the function __cpu_up() to the function .init.text:first_lines_of_secondary()
  The function __cpu_up() references
  the function __init first_lines_of_secondary().
  This is often because __cpu_up lacks a __init
  annotation or the annotation of first_lines_of_secondary is wrong.

Signed-off-by: Chen Gang <gang.chen@asianux.com>
---
 arch/arc/include/asm/smp.h | 2 +-
 arch/arc/kernel/head.S     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h
index c4fb211dcd25..eefc29f08cdb 100644
--- a/arch/arc/include/asm/smp.h
+++ b/arch/arc/include/asm/smp.h
@@ -30,7 +30,7 @@ extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
  * APIs provided by arch SMP code to rest of arch code
  */
 extern void __init smp_init_cpus(void);
-extern void __init first_lines_of_secondary(void);
+extern void first_lines_of_secondary(void);
 extern const char *arc_platform_smp_cpuinfo(void);
 
 /*
diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S
index 0f944f024513..2c878e964a64 100644
--- a/arch/arc/kernel/head.S
+++ b/arch/arc/kernel/head.S
@@ -95,7 +95,7 @@ stext:
 ;----------------------------------------------------------------
 ;     First lines of code run by secondary before jumping to 'C'
 ;----------------------------------------------------------------
-	.section .init.text, "ax",@progbits
+	.section .text, "ax",@progbits
 	.type first_lines_of_secondary, @function
 	.globl first_lines_of_secondary
 
-- 
cgit v1.2.3


From 4782f7f9aeba4dfd383c532e96bebf83ce0b9748 Mon Sep 17 00:00:00 2001
From: Chen Gang <gang.chen@asianux.com>
Date: Wed, 23 Oct 2013 11:02:51 +0800
Subject: arc: remove '__init' for get_hw_config_num_irq()

get_hw_config_num_irq() may be called by normal iss_model_init_smp()
which is a function pointer for 'init_smp' which may be called by
first_lines_of_secondary() which also need be normal too.

The related warning (with allmodconfig):

    MODPOST vmlinux.o
  WARNING: vmlinux.o(.text+0x5814): Section mismatch in reference from the function iss_model_init_smp() to the function .init.text:get_hw_config_num_irq()
  The function iss_model_init_smp() references
  the function __init get_hw_config_num_irq().
  This is often because iss_model_init_smp lacks a __init
  annotation or the annotation of get_hw_config_num_irq is wrong.

Signed-off-by: Chen Gang <gang.chen@asianux.com>
---
 arch/arc/include/asm/irq.h | 2 +-
 arch/arc/kernel/irq.c      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h
index 548207f073da..291a70db68b8 100644
--- a/arch/arc/include/asm/irq.h
+++ b/arch/arc/include/asm/irq.h
@@ -19,7 +19,7 @@
 #include <asm-generic/irq.h>
 
 extern void arc_init_IRQ(void);
-extern int __init get_hw_config_num_irq(void);
+extern int get_hw_config_num_irq(void);
 
 void arc_local_timer_setup(unsigned int cpu);
 
diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c
index 02684e1a0cf0..a4b141ee9a6a 100644
--- a/arch/arc/kernel/irq.c
+++ b/arch/arc/kernel/irq.c
@@ -150,7 +150,7 @@ void arch_do_IRQ(unsigned int irq, struct pt_regs *regs)
 	set_irq_regs(old_regs);
 }
 
-int __init get_hw_config_num_irq(void)
+int get_hw_config_num_irq(void)
 {
 	uint32_t val = read_aux_reg(ARC_REG_VECBASE_BCR);
 
-- 
cgit v1.2.3


From 8f146d0204a874b6b1f5344ed578796e2fcd7e9c Mon Sep 17 00:00:00 2001
From: Chen Gang <gang.chen@asianux.com>
Date: Mon, 28 Oct 2013 11:00:38 +0800
Subject: arc: export symbol for save_stack_trace() in stacktrace.c

Need export its symbol just like other architectures done, or can not
pass compiling with allmodconfig, the related error:

    MODPOST 2994 modules
  ERROR: "save_stack_trace" [kernel/backtracetest.ko] undefined!
  ERROR: "save_stack_trace" [drivers/md/persistent-data/dm-persistent-data.ko] undefined!

Signed-off-by: Chen Gang <gang.chen@asianux.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/stacktrace.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c
index ab97b034922f..9ce47cfe2303 100644
--- a/arch/arc/kernel/stacktrace.c
+++ b/arch/arc/kernel/stacktrace.c
@@ -246,4 +246,5 @@ void save_stack_trace(struct stack_trace *trace)
 	/* Pass NULL for task so it unwinds the current call frame */
 	arc_unwind_core(NULL, NULL, __collect_all, trace);
 }
+EXPORT_SYMBOL_GPL(save_stack_trace);
 #endif
-- 
cgit v1.2.3


From b6fe8e7c0189d017fdba90d1cd134337098c19c3 Mon Sep 17 00:00:00 2001
From: Chen Gang <gang.chen@asianux.com>
Date: Mon, 28 Oct 2013 11:49:47 +0800
Subject: arc: export symbol for pm_power_off in reset.c

Need export symbol for it, or can not pass compiling, the related error
with allmodconfig:

    MODPOST 2994 modules
  ERROR: "pm_power_off" [drivers/mfd/retu-mfd.ko] undefined!
  ERROR: "pm_power_off" [drivers/char/ipmi/ipmi_poweroff.ko] undefined!

Signed-off-by: Chen Gang <gang.chen@asianux.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/reset.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arc/kernel/reset.c b/arch/arc/kernel/reset.c
index e227a2b1c943..2768fa1e39b9 100644
--- a/arch/arc/kernel/reset.c
+++ b/arch/arc/kernel/reset.c
@@ -31,3 +31,4 @@ void machine_power_off(void)
 }
 
 void (*pm_power_off) (void) = NULL;
+EXPORT_SYMBOL(pm_power_off);
-- 
cgit v1.2.3


From 63eca94ca206e342bad4a06a86d8e7eda3053a4e Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 23 Aug 2013 19:16:34 +0530
Subject: ARC: [SMP] ASID allocation

-Track a Per CPU ASID counter
-mm-per-cpu ASID (multiple threads, or mm migrated around)

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/mmu.h         |  2 +-
 arch/arc/include/asm/mmu_context.h | 44 ++++++++++++++++++++++++--------------
 arch/arc/mm/tlb.c                  | 14 ++++++------
 3 files changed, 37 insertions(+), 23 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h
index c2663b32866b..8c84ae98c337 100644
--- a/arch/arc/include/asm/mmu.h
+++ b/arch/arc/include/asm/mmu.h
@@ -48,7 +48,7 @@
 #ifndef __ASSEMBLY__
 
 typedef struct {
-	unsigned long asid;	/* 8 bit MMU PID + Generation cycle */
+	unsigned long asid[NR_CPUS];	/* 8 bit MMU PID + Generation cycle */
 } mm_context_t;
 
 #ifdef CONFIG_ARC_DBG_TLB_PARANOIA
diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h
index 43a1b51bb8cc..45f06f566b02 100644
--- a/arch/arc/include/asm/mmu_context.h
+++ b/arch/arc/include/asm/mmu_context.h
@@ -30,13 +30,13 @@
  * "Fast Context Switch" i.e. no TLB flush on ctxt-switch
  *
  * Linux assigns each task a unique ASID. A simple round-robin allocation
- * of H/w ASID is done using software tracker @asid_cache.
+ * of H/w ASID is done using software tracker @asid_cpu.
  * When it reaches max 255, the allocation cycle starts afresh by flushing
  * the entire TLB and wrapping ASID back to zero.
  *
  * A new allocation cycle, post rollover, could potentially reassign an ASID
  * to a different task. Thus the rule is to refresh the ASID in a new cycle.
- * The 32 bit @asid_cache (and mm->asid) have 8 bits MMU PID and rest 24 bits
+ * The 32 bit @asid_cpu (and mm->asid) have 8 bits MMU PID and rest 24 bits
  * serve as cycle/generation indicator and natural 32 bit unsigned math
  * automagically increments the generation when lower 8 bits rollover.
  */
@@ -47,9 +47,11 @@
 #define MM_CTXT_FIRST_CYCLE	(MM_CTXT_ASID_MASK + 1)
 #define MM_CTXT_NO_ASID		0UL
 
-#define hw_pid(mm)		(mm->context.asid & MM_CTXT_ASID_MASK)
+#define asid_mm(mm, cpu)	mm->context.asid[cpu]
+#define hw_pid(mm, cpu)		(asid_mm(mm, cpu) & MM_CTXT_ASID_MASK)
 
-extern unsigned int asid_cache;
+DECLARE_PER_CPU(unsigned int, asid_cache);
+#define asid_cpu(cpu)		per_cpu(asid_cache, cpu)
 
 /*
  * Get a new ASID if task doesn't have a valid one (unalloc or from prev cycle)
@@ -57,6 +59,7 @@ extern unsigned int asid_cache;
  */
 static inline void get_new_mmu_context(struct mm_struct *mm)
 {
+	const unsigned int cpu = smp_processor_id();
 	unsigned long flags;
 
 	local_irq_save(flags);
@@ -71,11 +74,11 @@ static inline void get_new_mmu_context(struct mm_struct *mm)
 	 * 	 first need to destroy the context, setting it to invalid
 	 * 	 value.
 	 */
-	if (!((mm->context.asid ^ asid_cache) & MM_CTXT_CYCLE_MASK))
+	if (!((asid_mm(mm, cpu) ^ asid_cpu(cpu)) & MM_CTXT_CYCLE_MASK))
 		goto set_hw;
 
 	/* move to new ASID and handle rollover */
-	if (unlikely(!(++asid_cache & MM_CTXT_ASID_MASK))) {
+	if (unlikely(!(++asid_cpu(cpu) & MM_CTXT_ASID_MASK))) {
 
 		flush_tlb_all();
 
@@ -84,15 +87,15 @@ static inline void get_new_mmu_context(struct mm_struct *mm)
 		 * If the container itself wrapped around, set it to a non zero
 		 * "generation" to distinguish from no context
 		 */
-		if (!asid_cache)
-			asid_cache = MM_CTXT_FIRST_CYCLE;
+		if (!asid_cpu(cpu))
+			asid_cpu(cpu) = MM_CTXT_FIRST_CYCLE;
 	}
 
 	/* Assign new ASID to tsk */
-	mm->context.asid = asid_cache;
+	asid_mm(mm, cpu) = asid_cpu(cpu);
 
 set_hw:
-	write_aux_reg(ARC_REG_PID, hw_pid(mm) | MMU_ENABLE);
+	write_aux_reg(ARC_REG_PID, hw_pid(mm, cpu) | MMU_ENABLE);
 
 	local_irq_restore(flags);
 }
@@ -104,10 +107,24 @@ set_hw:
 static inline int
 init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
-	mm->context.asid = MM_CTXT_NO_ASID;
+	int i;
+
+	for_each_possible_cpu(i)
+		asid_mm(mm, i) = MM_CTXT_NO_ASID;
+
 	return 0;
 }
 
+static inline void destroy_context(struct mm_struct *mm)
+{
+	unsigned long flags;
+
+	/* Needed to elide CONFIG_DEBUG_PREEMPT warning */
+	local_irq_save(flags);
+	asid_mm(mm, smp_processor_id()) = MM_CTXT_NO_ASID;
+	local_irq_restore(flags);
+}
+
 /* Prepare the MMU for task: setup PID reg with allocated ASID
     If task doesn't have an ASID (never alloc or stolen, get a new ASID)
 */
@@ -131,11 +148,6 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
  */
 #define activate_mm(prev, next)		switch_mm(prev, next, NULL)
 
-static inline void destroy_context(struct mm_struct *mm)
-{
-	mm->context.asid = MM_CTXT_NO_ASID;
-}
-
 /* it seemed that deactivate_mm( ) is a reasonable place to do book-keeping
  * for retiring-mm. However destroy_context( ) still needs to do that because
  * between mm_release( ) = >deactive_mm( ) and
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 5f53050abf60..db0f0f823980 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -100,7 +100,7 @@
 
 
 /* A copy of the ASID from the PID reg is kept in asid_cache */
-unsigned int asid_cache = MM_CTXT_FIRST_CYCLE;
+DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE;
 
 /*
  * Utility Routine to erase a J-TLB entry
@@ -274,6 +274,7 @@ noinline void local_flush_tlb_mm(struct mm_struct *mm)
 void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 			   unsigned long end)
 {
+	const unsigned int cpu = smp_processor_id();
 	unsigned long flags;
 
 	/* If range @start to @end is more than 32 TLB entries deep,
@@ -297,9 +298,9 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 
 	local_irq_save(flags);
 
-	if (vma->vm_mm->context.asid != MM_CTXT_NO_ASID) {
+	if (asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID) {
 		while (start < end) {
-			tlb_entry_erase(start | hw_pid(vma->vm_mm));
+			tlb_entry_erase(start | hw_pid(vma->vm_mm, cpu));
 			start += PAGE_SIZE;
 		}
 	}
@@ -346,6 +347,7 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
 
 void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
 {
+	const unsigned int cpu = smp_processor_id();
 	unsigned long flags;
 
 	/* Note that it is critical that interrupts are DISABLED between
@@ -353,8 +355,8 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
 	 */
 	local_irq_save(flags);
 
-	if (vma->vm_mm->context.asid != MM_CTXT_NO_ASID) {
-		tlb_entry_erase((page & PAGE_MASK) | hw_pid(vma->vm_mm));
+	if (asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID) {
+		tlb_entry_erase((page & PAGE_MASK) | hw_pid(vma->vm_mm, cpu));
 		utlb_invalidate();
 	}
 
@@ -400,7 +402,7 @@ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
 
 	local_irq_save(flags);
 
-	tlb_paranoid_check(vma->vm_mm->context.asid, address);
+	tlb_paranoid_check(asid_mm(vma->vm_mm, smp_processor_id()), address);
 
 	address &= PAGE_MASK;
 
-- 
cgit v1.2.3


From 5ea72a90261552ed5fdca35239feb6cba498301e Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Sun, 27 Oct 2013 14:49:02 +0530
Subject: ARC: [SMP] TLB flush

- Add mm_cpumask setting (aggregating only, unlike some other arches)
  used to restrict the TLB flush cross-calling

- cross-calling versions of TLB flush routines (thanks to Noam)

Signed-off-by: Noam Camus <noamc@ezchip.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/mmu_context.h | 17 ++++++++-
 arch/arc/include/asm/tlbflush.h    | 11 ++++--
 arch/arc/kernel/smp.c              |  1 +
 arch/arc/mm/tlb.c                  | 73 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 99 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h
index 45f06f566b02..1fd467ef658f 100644
--- a/arch/arc/include/asm/mmu_context.h
+++ b/arch/arc/include/asm/mmu_context.h
@@ -80,7 +80,7 @@ static inline void get_new_mmu_context(struct mm_struct *mm)
 	/* move to new ASID and handle rollover */
 	if (unlikely(!(++asid_cpu(cpu) & MM_CTXT_ASID_MASK))) {
 
-		flush_tlb_all();
+		local_flush_tlb_all();
 
 		/*
 		 * Above checke for rollover of 8 bit ASID in 32 bit container.
@@ -131,6 +131,21 @@ static inline void destroy_context(struct mm_struct *mm)
 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 			     struct task_struct *tsk)
 {
+	const int cpu = smp_processor_id();
+
+	/*
+	 * Note that the mm_cpumask is "aggregating" only, we don't clear it
+	 * for the switched-out task, unlike some other arches.
+	 * It is used to enlist cpus for sending TLB flush IPIs and not sending
+	 * it to CPUs where a task once ran-on, could cause stale TLB entry
+	 * re-use, specially for a multi-threaded task.
+	 * e.g. T1 runs on C1, migrates to C3. T2 running on C2 munmaps.
+	 *      For a non-aggregating mm_cpumask, IPI not sent C1, and if T1
+	 *      were to re-migrate to C1, it could access the unmapped region
+	 *      via any existing stale TLB entries.
+	 */
+	cpumask_set_cpu(cpu, mm_cpumask(next));
+
 #ifndef CONFIG_SMP
 	/* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */
 	write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd);
diff --git a/arch/arc/include/asm/tlbflush.h b/arch/arc/include/asm/tlbflush.h
index b2f9bc7f68c8..71c7b2e4b874 100644
--- a/arch/arc/include/asm/tlbflush.h
+++ b/arch/arc/include/asm/tlbflush.h
@@ -18,11 +18,18 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end);
 void local_flush_tlb_range(struct vm_area_struct *vma,
 			   unsigned long start, unsigned long end);
 
-/* XXX: Revisit for SMP */
+#ifndef CONFIG_SMP
 #define flush_tlb_range(vma, s, e)	local_flush_tlb_range(vma, s, e)
 #define flush_tlb_page(vma, page)	local_flush_tlb_page(vma, page)
 #define flush_tlb_kernel_range(s, e)	local_flush_tlb_kernel_range(s, e)
 #define flush_tlb_all()			local_flush_tlb_all()
 #define flush_tlb_mm(mm)		local_flush_tlb_mm(mm)
-
+#else
+extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+							 unsigned long end);
+extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
+extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+extern void flush_tlb_all(void);
+extern void flush_tlb_mm(struct mm_struct *mm);
+#endif /* CONFIG_SMP */
 #endif
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index 7f0ab1ecd640..41bc4c703f42 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -128,6 +128,7 @@ void start_kernel_secondary(void)
 	atomic_inc(&mm->mm_users);
 	atomic_inc(&mm->mm_count);
 	current->active_mm = mm;
+	cpumask_set_cpu(cpu, mm_cpumask(mm));
 
 	notify_cpu_starting(cpu);
 	set_cpu_online(cpu, true);
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index db0f0f823980..e1acf0ce5647 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -363,6 +363,79 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
 	local_irq_restore(flags);
 }
 
+#ifdef CONFIG_SMP
+
+struct tlb_args {
+	struct vm_area_struct *ta_vma;
+	unsigned long ta_start;
+	unsigned long ta_end;
+};
+
+static inline void ipi_flush_tlb_page(void *arg)
+{
+	struct tlb_args *ta = arg;
+
+	local_flush_tlb_page(ta->ta_vma, ta->ta_start);
+}
+
+static inline void ipi_flush_tlb_range(void *arg)
+{
+	struct tlb_args *ta = arg;
+
+	local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end);
+}
+
+static inline void ipi_flush_tlb_kernel_range(void *arg)
+{
+	struct tlb_args *ta = (struct tlb_args *)arg;
+
+	local_flush_tlb_kernel_range(ta->ta_start, ta->ta_end);
+}
+
+void flush_tlb_all(void)
+{
+	on_each_cpu((smp_call_func_t)local_flush_tlb_all, NULL, 1);
+}
+
+void flush_tlb_mm(struct mm_struct *mm)
+{
+	on_each_cpu_mask(mm_cpumask(mm), (smp_call_func_t)local_flush_tlb_mm,
+			 mm, 1);
+}
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
+{
+	struct tlb_args ta = {
+		.ta_vma = vma,
+		.ta_start = uaddr
+	};
+
+	on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_page, &ta, 1);
+}
+
+void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+		     unsigned long end)
+{
+	struct tlb_args ta = {
+		.ta_vma = vma,
+		.ta_start = start,
+		.ta_end = end
+	};
+
+	on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range, &ta, 1);
+}
+
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	struct tlb_args ta = {
+		.ta_start = start,
+		.ta_end = end
+	};
+
+	on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1);
+}
+#endif
+
 /*
  * Routine to create a TLB entry
  */
-- 
cgit v1.2.3


From 3aa4f80e410b3c14d987c42a90c31023c3081b46 Mon Sep 17 00:00:00 2001
From: Noam Camus <noamc@ezchip.com>
Date: Mon, 3 Jun 2013 15:19:59 +0300
Subject: ARC: [SMP] enlarge possible NR_CPUS

Signed-off-by: Noam Camus <noamc@ezchip.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 524e6fce2c89..fb4177e48260 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -145,8 +145,8 @@ config ARC_HAS_REENTRANT_IRQ_LV2
 endif
 
 config NR_CPUS
-	int "Maximum number of CPUs (2-32)"
-	range 2 32
+	int "Maximum number of CPUs (2-4096)"
+	range 2 4096
 	depends on SMP
 	default "2"
 
-- 
cgit v1.2.3


From 57e26e57454fae4f1d15c2e9fa965b7a8046ab34 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 1 Nov 2013 10:46:40 +0530
Subject: ARC: [SMP] Fix build failures for large NR_CPUS

ST.as only takes S9 (255) for offset. This was going out of range when
accessing a task_struct field with 4k NR_CPUS (due to 128b of coumaks
itself in there).

Workaround by using an intermediate register to do the address scaling.

There is some duplication of fix for ctx_sw.c and ctx_sw_asm.S however
given that C version will go away soon I'm not bothering to factor out
the common code.

Reported-by: Noam Camus <noamc@ezchip.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/ctx_sw.c     | 13 ++++++++++++-
 arch/arc/kernel/ctx_sw_asm.S | 11 +++++++++--
 2 files changed, 21 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/kernel/ctx_sw.c b/arch/arc/kernel/ctx_sw.c
index 34410eb1a308..c14a5bea0c76 100644
--- a/arch/arc/kernel/ctx_sw.c
+++ b/arch/arc/kernel/ctx_sw.c
@@ -17,6 +17,8 @@
 #include <asm/asm-offsets.h>
 #include <linux/sched.h>
 
+#define KSP_WORD_OFF 	((TASK_THREAD + THREAD_KSP) / 4)
+
 struct task_struct *__sched
 __switch_to(struct task_struct *prev_task, struct task_struct *next_task)
 {
@@ -45,7 +47,16 @@ __switch_to(struct task_struct *prev_task, struct task_struct *next_task)
 #endif
 
 		/* set ksp of outgoing task in tsk->thread.ksp */
+#if KSP_WORD_OFF <= 255
 		"st.as   sp, [%3, %1]    \n\t"
+#else
+		/*
+		 * Workaround for NR_CPUS=4k
+		 * %1 is bigger than 255 (S9 offset for st.as)
+		 */
+		"add2    r24, %3, %1     \n\t"
+		"st      sp, [r24]       \n\t"
+#endif
 
 		"sync   \n\t"
 
@@ -97,7 +108,7 @@ __switch_to(struct task_struct *prev_task, struct task_struct *next_task)
 		/* FP/BLINK restore generated by gcc (standard func epilogue */
 
 		: "=r"(tmp)
-		: "n"((TASK_THREAD + THREAD_KSP) / 4), "r"(next), "r"(prev)
+		: "n"(KSP_WORD_OFF), "r"(next), "r"(prev)
 		: "blink"
 	);
 
diff --git a/arch/arc/kernel/ctx_sw_asm.S b/arch/arc/kernel/ctx_sw_asm.S
index d8972345e4c2..65690e7fcc8c 100644
--- a/arch/arc/kernel/ctx_sw_asm.S
+++ b/arch/arc/kernel/ctx_sw_asm.S
@@ -14,6 +14,8 @@
 #include <asm/asm-offsets.h>
 #include <asm/linkage.h>
 
+#define KSP_WORD_OFF 	((TASK_THREAD + THREAD_KSP) / 4)
+
 ;################### Low Level Context Switch ##########################
 
 	.section .sched.text,"ax",@progbits
@@ -28,8 +30,13 @@ __switch_to:
 	SAVE_CALLEE_SAVED_KERNEL
 
 	/* Save the now KSP in task->thread.ksp */
-	st.as  sp, [r0, (TASK_THREAD + THREAD_KSP)/4]
-
+#if KSP_WORD_OFF  <= 255
+	st.as  sp, [r0, KSP_WORD_OFF]
+#else
+	/* Workaround for NR_CPUS=4k as ST.as can only take s9 offset */
+	add2	r24, r0, KSP_WORD_OFF
+	st	sp, [r24]
+#endif
 	/*
 	* Return last task in r0 (return reg)
 	* On ARC, Return reg = First Arg reg = r0.
-- 
cgit v1.2.3


From 7d0857a54aedbd47b3de503933d65ce462970bd6 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Mon, 9 Sep 2013 16:04:15 +0530
Subject: ARC: [SMP] Disallow RTSC

RTSC is strictly incore and must not be allowed in SMP configs

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/Kconfig       | 6 +-----
 arch/arc/kernel/time.c | 7 ++++---
 2 files changed, 5 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index fb4177e48260..5ede5460c806 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -136,9 +136,6 @@ if SMP
 config ARC_HAS_COH_CACHES
 	def_bool n
 
-config ARC_HAS_COH_RTSC
-	def_bool n
-
 config ARC_HAS_REENTRANT_IRQ_LV2
 	def_bool n
 
@@ -332,8 +329,7 @@ config ARC_HAS_RTSC
 	bool "Insn: RTSC (64-bit r/o cycle counter)"
 	default y
 	depends on ARC_CPU_REL_4_10
-	# if SMP, enable RTSC only if counter is coherent across cores
-	depends on !SMP || ARC_HAS_COH_RTSC
+	depends on !SMP
 
 endmenu   # "ARC CPU Configuration"
 
diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c
index 0a9b6b289c4f..e5f3a837fb35 100644
--- a/arch/arc/kernel/time.c
+++ b/arch/arc/kernel/time.c
@@ -63,9 +63,10 @@
 
 int arc_counter_setup(void)
 {
-	/* RTSC insn taps into cpu clk, needs no setup */
-
-	/* For SMP, only allowed if cross-core-sync, hence usable as cs */
+	/*
+	 * For SMP this needs to be 0. However Kconfig glue doesn't
+	 * enable this option for SMP configs
+	 */
 	return 1;
 }
 
-- 
cgit v1.2.3


From 06b93c347c93d53f5cd76c3efa277ea114cb2000 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@huawei.com>
Date: Thu, 12 Sep 2013 00:07:07 +0800
Subject: smp, ARC: kill SMP single function call interrupt

Commit 9a46ad6d6df3b54 "smp: make smp_call_function_many() use logic
similar to smp_call_function_single()" has unified the way to handle
single and multiple cross-CPU function calls. Now only one interrupt
is needed for architecture specific code to support generic SMP function
call interfaces, so kill the redundant single function call interrupt.

Signed-off-by: Jiang Liu <jiang.liu@huawei.com>
Cc: Jiang Liu <liuj97@gmail.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/smp.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index 41bc4c703f42..c2f9ebbc38f6 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -211,7 +211,6 @@ enum ipi_msg_type {
 	IPI_NOP = 0,
 	IPI_RESCHEDULE = 1,
 	IPI_CALL_FUNC,
-	IPI_CALL_FUNC_SINGLE,
 	IPI_CPU_STOP
 };
 
@@ -255,7 +254,7 @@ void smp_send_stop(void)
 
 void arch_send_call_function_single_ipi(int cpu)
 {
-	ipi_send_msg(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
+	ipi_send_msg(cpumask_of(cpu), IPI_CALL_FUNC);
 }
 
 void arch_send_call_function_ipi_mask(const struct cpumask *mask)
@@ -287,10 +286,6 @@ static inline void __do_IPI(unsigned long *ops, struct ipi_data *ipi, int cpu)
 			generic_smp_call_function_interrupt();
 			break;
 
-		case IPI_CALL_FUNC_SINGLE:
-			generic_smp_call_function_single_interrupt();
-			break;
-
 		case IPI_CPU_STOP:
 			ipi_cpu_stop(cpu);
 			break;
-- 
cgit v1.2.3


From 737d5b980be82f722153d8104f7949e4204c5911 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 1 Nov 2013 19:54:19 +0530
Subject: ARC: [plat-arcfpga] defconfig update

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/configs/fpga_defconfig | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/arc/configs/fpga_defconfig b/arch/arc/configs/fpga_defconfig
index 4ca50f1f8d05..e283aa586934 100644
--- a/arch/arc/configs/fpga_defconfig
+++ b/arch/arc/configs/fpga_defconfig
@@ -2,6 +2,8 @@ CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
@@ -62,4 +64,5 @@ CONFIG_TMPFS=y
 CONFIG_NFS_FS=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
 # CONFIG_ENABLE_MUST_CHECK is not set
+# CONFIG_DEBUG_PREEMPT is not set
 CONFIG_XZ_DEC=y
-- 
cgit v1.2.3