From 158e8bfe802f730f9ea7cde32eee8b43285bdd4a Mon Sep 17 00:00:00 2001
From: Alessandro Rubini <rubini@gnudd.com>
Date: Sun, 24 Jun 2012 12:46:26 +0100
Subject: ARM: 7432/1: use the new linux/sizes.h

Signed-off-by: Alessandro Rubini <rubini@gnudd.com>
Acked-by: Giancarlo Asnaghi <giancarlo.asnaghi@st.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Cc: Alan Cox <alan@linux.intel.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/memory.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index fcb575747e5e..e965f1b560f1 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -16,7 +16,7 @@
 #include <linux/compiler.h>
 #include <linux/const.h>
 #include <linux/types.h>
-#include <asm/sizes.h>
+#include <linux/sizes.h>
 
 #ifdef CONFIG_NEED_MACH_MEMORY_H
 #include <mach/memory.h>
-- 
cgit 


From bc89663aa5c7ca620f58c34ab531ca409119becc Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Thu, 28 Jun 2012 14:42:08 +0800
Subject: ARM: fiq: change FIQ_START to a variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The commit a2be01b (ARM: only include mach/irqs.h for !SPARSE_IRQ)
makes mach/irqs.h only be included for !SPARSE_IRQ build.  There are
a nubmer of platforms have FIQ_START defined in mach/irqs.h for FIQ
support.

  arch/arm/mach-rpc/include/mach/irqs.h:#define FIQ_START         64
  arch/arm/mach-s3c24xx/include/mach/irqs.h:#define FIQ_START             IRQ_EINT0
  arch/arm/plat-mxc/include/mach/irqs.h:#define FIQ_START 0

If SPARSE_IRQ is enabled for any of these platforms, the following
compile error will be seen.

  arch/arm/kernel/fiq.c: In function ‘enable_fiq’:
  arch/arm/kernel/fiq.c:127:19: error: ‘FIQ_START’ undeclared (first use in this function)
  arch/arm/kernel/fiq.c:127:19: note: each undeclared identifier is reported only once for each function it appears in
  arch/arm/kernel/fiq.c: In function ‘disable_fiq’:
  arch/arm/kernel/fiq.c:132:20: error: ‘FIQ_START’ undeclared (first use in this function)

The patch changes fiq code to have init_FIQ take FIQ_START from
platforms as a parameter and assign it to variable fiq_start which
is to replace FIQ_START uses in enable_fiq/disable_fiq.

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Cc: Kukjin Kim <kgene.kim@samsung.com>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Cc: Rob Herring <rob.herring@calxeda.com>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/mach/irq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/mach/irq.h b/arch/arm/include/asm/mach/irq.h
index febe495d0c6e..15cb035309f7 100644
--- a/arch/arm/include/asm/mach/irq.h
+++ b/arch/arm/include/asm/mach/irq.h
@@ -17,7 +17,7 @@ struct seq_file;
 /*
  * This is internal.  Do not use it.
  */
-extern void init_FIQ(void);
+extern void init_FIQ(int);
 extern int show_fiq_list(struct seq_file *, int);
 
 #ifdef CONFIG_MULTI_IRQ_HANDLER
-- 
cgit 


From a5203c4ce6750730b1d95a8bc1e8214765450f7e Mon Sep 17 00:00:00 2001
From: Paul Bolle <pebolle@tiscali.nl>
Date: Sun, 8 Jul 2012 22:51:44 +0100
Subject: ARM: 7460/1: remove asm/locks.h

Commit 64ac24e738823161693bf791f87adc802cf529ff ("Generic semaphore
implementation") removed the last include of this header. Apparently it
was just an oversight to keep this header. It can safely be removed now.

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/locks.h | 274 -------------------------------------------
 1 file changed, 274 deletions(-)
 delete mode 100644 arch/arm/include/asm/locks.h

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/locks.h b/arch/arm/include/asm/locks.h
deleted file mode 100644
index ef4c897772d1..000000000000
--- a/arch/arm/include/asm/locks.h
+++ /dev/null
@@ -1,274 +0,0 @@
-/*
- *  arch/arm/include/asm/locks.h
- *
- *  Copyright (C) 2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- *  Interrupt safe locking assembler. 
- */
-#ifndef __ASM_PROC_LOCKS_H
-#define __ASM_PROC_LOCKS_H
-
-#if __LINUX_ARM_ARCH__ >= 6
-
-#define __down_op(ptr,fail)			\
-	({					\
-	__asm__ __volatile__(			\
-	"@ down_op\n"				\
-"1:	ldrex	lr, [%0]\n"			\
-"	sub	lr, lr, %1\n"			\
-"	strex	ip, lr, [%0]\n"			\
-"	teq	ip, #0\n"			\
-"	bne	1b\n"				\
-"	teq	lr, #0\n"			\
-"	movmi	ip, %0\n"			\
-"	blmi	" #fail				\
-	:					\
-	: "r" (ptr), "I" (1)			\
-	: "ip", "lr", "cc");			\
-	smp_mb();				\
-	})
-
-#define __down_op_ret(ptr,fail)			\
-	({					\
-		unsigned int ret;		\
-	__asm__ __volatile__(			\
-	"@ down_op_ret\n"			\
-"1:	ldrex	lr, [%1]\n"			\
-"	sub	lr, lr, %2\n"			\
-"	strex	ip, lr, [%1]\n"			\
-"	teq	ip, #0\n"			\
-"	bne	1b\n"				\
-"	teq	lr, #0\n"			\
-"	movmi	ip, %1\n"			\
-"	movpl	ip, #0\n"			\
-"	blmi	" #fail "\n"			\
-"	mov	%0, ip"				\
-	: "=&r" (ret)				\
-	: "r" (ptr), "I" (1)			\
-	: "ip", "lr", "cc");			\
-	smp_mb();				\
-	ret;					\
-	})
-
-#define __up_op(ptr,wake)			\
-	({					\
-	smp_mb();				\
-	__asm__ __volatile__(			\
-	"@ up_op\n"				\
-"1:	ldrex	lr, [%0]\n"			\
-"	add	lr, lr, %1\n"			\
-"	strex	ip, lr, [%0]\n"			\
-"	teq	ip, #0\n"			\
-"	bne	1b\n"				\
-"	cmp	lr, #0\n"			\
-"	movle	ip, %0\n"			\
-"	blle	" #wake				\
-	:					\
-	: "r" (ptr), "I" (1)			\
-	: "ip", "lr", "cc");			\
-	})
-
-/*
- * The value 0x01000000 supports up to 128 processors and
- * lots of processes.  BIAS must be chosen such that sub'ing
- * BIAS once per CPU will result in the long remaining
- * negative.
- */
-#define RW_LOCK_BIAS      0x01000000
-#define RW_LOCK_BIAS_STR "0x01000000"
-
-#define __down_op_write(ptr,fail)		\
-	({					\
-	__asm__ __volatile__(			\
-	"@ down_op_write\n"			\
-"1:	ldrex	lr, [%0]\n"			\
-"	sub	lr, lr, %1\n"			\
-"	strex	ip, lr, [%0]\n"			\
-"	teq	ip, #0\n"			\
-"	bne	1b\n"				\
-"	teq	lr, #0\n"			\
-"	movne	ip, %0\n"			\
-"	blne	" #fail				\
-	:					\
-	: "r" (ptr), "I" (RW_LOCK_BIAS)		\
-	: "ip", "lr", "cc");			\
-	smp_mb();				\
-	})
-
-#define __up_op_write(ptr,wake)			\
-	({					\
-	smp_mb();				\
-	__asm__ __volatile__(			\
-	"@ up_op_write\n"			\
-"1:	ldrex	lr, [%0]\n"			\
-"	adds	lr, lr, %1\n"			\
-"	strex	ip, lr, [%0]\n"			\
-"	teq	ip, #0\n"			\
-"	bne	1b\n"				\
-"	movcs	ip, %0\n"			\
-"	blcs	" #wake				\
-	:					\
-	: "r" (ptr), "I" (RW_LOCK_BIAS)		\
-	: "ip", "lr", "cc");			\
-	})
-
-#define __down_op_read(ptr,fail)		\
-	__down_op(ptr, fail)
-
-#define __up_op_read(ptr,wake)			\
-	({					\
-	smp_mb();				\
-	__asm__ __volatile__(			\
-	"@ up_op_read\n"			\
-"1:	ldrex	lr, [%0]\n"			\
-"	add	lr, lr, %1\n"			\
-"	strex	ip, lr, [%0]\n"			\
-"	teq	ip, #0\n"			\
-"	bne	1b\n"				\
-"	teq	lr, #0\n"			\
-"	moveq	ip, %0\n"			\
-"	bleq	" #wake				\
-	:					\
-	: "r" (ptr), "I" (1)			\
-	: "ip", "lr", "cc");			\
-	})
-
-#else
-
-#define __down_op(ptr,fail)			\
-	({					\
-	__asm__ __volatile__(			\
-	"@ down_op\n"				\
-"	mrs	ip, cpsr\n"			\
-"	orr	lr, ip, #128\n"			\
-"	msr	cpsr_c, lr\n"			\
-"	ldr	lr, [%0]\n"			\
-"	subs	lr, lr, %1\n"			\
-"	str	lr, [%0]\n"			\
-"	msr	cpsr_c, ip\n"			\
-"	movmi	ip, %0\n"			\
-"	blmi	" #fail				\
-	:					\
-	: "r" (ptr), "I" (1)			\
-	: "ip", "lr", "cc");			\
-	smp_mb();				\
-	})
-
-#define __down_op_ret(ptr,fail)			\
-	({					\
-		unsigned int ret;		\
-	__asm__ __volatile__(			\
-	"@ down_op_ret\n"			\
-"	mrs	ip, cpsr\n"			\
-"	orr	lr, ip, #128\n"			\
-"	msr	cpsr_c, lr\n"			\
-"	ldr	lr, [%1]\n"			\
-"	subs	lr, lr, %2\n"			\
-"	str	lr, [%1]\n"			\
-"	msr	cpsr_c, ip\n"			\
-"	movmi	ip, %1\n"			\
-"	movpl	ip, #0\n"			\
-"	blmi	" #fail "\n"			\
-"	mov	%0, ip"				\
-	: "=&r" (ret)				\
-	: "r" (ptr), "I" (1)			\
-	: "ip", "lr", "cc");			\
-	smp_mb();				\
-	ret;					\
-	})
-
-#define __up_op(ptr,wake)			\
-	({					\
-	smp_mb();				\
-	__asm__ __volatile__(			\
-	"@ up_op\n"				\
-"	mrs	ip, cpsr\n"			\
-"	orr	lr, ip, #128\n"			\
-"	msr	cpsr_c, lr\n"			\
-"	ldr	lr, [%0]\n"			\
-"	adds	lr, lr, %1\n"			\
-"	str	lr, [%0]\n"			\
-"	msr	cpsr_c, ip\n"			\
-"	movle	ip, %0\n"			\
-"	blle	" #wake				\
-	:					\
-	: "r" (ptr), "I" (1)			\
-	: "ip", "lr", "cc");			\
-	})
-
-/*
- * The value 0x01000000 supports up to 128 processors and
- * lots of processes.  BIAS must be chosen such that sub'ing
- * BIAS once per CPU will result in the long remaining
- * negative.
- */
-#define RW_LOCK_BIAS      0x01000000
-#define RW_LOCK_BIAS_STR "0x01000000"
-
-#define __down_op_write(ptr,fail)		\
-	({					\
-	__asm__ __volatile__(			\
-	"@ down_op_write\n"			\
-"	mrs	ip, cpsr\n"			\
-"	orr	lr, ip, #128\n"			\
-"	msr	cpsr_c, lr\n"			\
-"	ldr	lr, [%0]\n"			\
-"	subs	lr, lr, %1\n"			\
-"	str	lr, [%0]\n"			\
-"	msr	cpsr_c, ip\n"			\
-"	movne	ip, %0\n"			\
-"	blne	" #fail				\
-	:					\
-	: "r" (ptr), "I" (RW_LOCK_BIAS)		\
-	: "ip", "lr", "cc");			\
-	smp_mb();				\
-	})
-
-#define __up_op_write(ptr,wake)			\
-	({					\
-	__asm__ __volatile__(			\
-	"@ up_op_write\n"			\
-"	mrs	ip, cpsr\n"			\
-"	orr	lr, ip, #128\n"			\
-"	msr	cpsr_c, lr\n"			\
-"	ldr	lr, [%0]\n"			\
-"	adds	lr, lr, %1\n"			\
-"	str	lr, [%0]\n"			\
-"	msr	cpsr_c, ip\n"			\
-"	movcs	ip, %0\n"			\
-"	blcs	" #wake				\
-	:					\
-	: "r" (ptr), "I" (RW_LOCK_BIAS)		\
-	: "ip", "lr", "cc");			\
-	smp_mb();				\
-	})
-
-#define __down_op_read(ptr,fail)		\
-	__down_op(ptr, fail)
-
-#define __up_op_read(ptr,wake)			\
-	({					\
-	smp_mb();				\
-	__asm__ __volatile__(			\
-	"@ up_op_read\n"			\
-"	mrs	ip, cpsr\n"			\
-"	orr	lr, ip, #128\n"			\
-"	msr	cpsr_c, lr\n"			\
-"	ldr	lr, [%0]\n"			\
-"	adds	lr, lr, %1\n"			\
-"	str	lr, [%0]\n"			\
-"	msr	cpsr_c, ip\n"			\
-"	moveq	ip, %0\n"			\
-"	bleq	" #wake				\
-	:					\
-	: "r" (ptr), "I" (1)			\
-	: "ip", "lr", "cc");			\
-	})
-
-#endif
-
-#endif
-- 
cgit 


From 546c2896a42202dbc7d02f7c6ec9948ac1bf511b Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 6 Jul 2012 15:43:41 +0100
Subject: ARM: 7446/1: spinlock: use ticket algorithm for ARMv6+ locking
 implementation

Ticket spinlocks ensure locking fairness by introducing a FIFO-like
nature to the granting of lock acquisitions and also reducing the
thundering herd effect when spinning on a lock by allowing the cacheline
to remain in a shared state amongst the waiting CPUs. This is especially
important on systems where memory-access times are not necessarily
uniform when accessing the lock structure (for example, on a
multi-cluster platform where the lock is allocated into L1 when a CPU
releases it).

This patch implements the ticket spinlock algorithm for ARM, replacing
the simpler implementation for ARMv6+ processors.

Reviewed-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/spinlock.h       | 72 +++++++++++++++++++++++------------
 arch/arm/include/asm/spinlock_types.h | 17 ++++++++-
 2 files changed, 63 insertions(+), 26 deletions(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
index 65fa3c88095c..0da2effd4b37 100644
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -59,18 +59,13 @@ static inline void dsb_sev(void)
 }
 
 /*
- * ARMv6 Spin-locking.
+ * ARMv6 ticket-based spin-locking.
  *
- * We exclusively read the old value.  If it is zero, we may have
- * won the lock, so we try exclusively storing it.  A memory barrier
- * is required after we get a lock, and before we release it, because
- * V6 CPUs are assumed to have weakly ordered memory.
- *
- * Unlocked value: 0
- * Locked value: 1
+ * A memory barrier is required after we get a lock, and before we
+ * release it, because V6 CPUs are assumed to have weakly ordered
+ * memory.
  */
 
-#define arch_spin_is_locked(x)		((x)->lock != 0)
 #define arch_spin_unlock_wait(lock) \
 	do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
 
@@ -79,31 +74,39 @@ static inline void dsb_sev(void)
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
 	unsigned long tmp;
+	u32 newval;
+	arch_spinlock_t lockval;
 
 	__asm__ __volatile__(
-"1:	ldrex	%0, [%1]\n"
-"	teq	%0, #0\n"
-	WFE("ne")
-"	strexeq	%0, %2, [%1]\n"
-"	teqeq	%0, #0\n"
+"1:	ldrex	%0, [%3]\n"
+"	add	%1, %0, %4\n"
+"	strex	%2, %1, [%3]\n"
+"	teq	%2, #0\n"
 "	bne	1b"
-	: "=&r" (tmp)
-	: "r" (&lock->lock), "r" (1)
+	: "=&r" (lockval), "=&r" (newval), "=&r" (tmp)
+	: "r" (&lock->slock), "I" (1 << TICKET_SHIFT)
 	: "cc");
 
+	while (lockval.tickets.next != lockval.tickets.owner) {
+		wfe();
+		lockval.tickets.owner = ACCESS_ONCE(lock->tickets.owner);
+	}
+
 	smp_mb();
 }
 
 static inline int arch_spin_trylock(arch_spinlock_t *lock)
 {
 	unsigned long tmp;
+	u32 slock;
 
 	__asm__ __volatile__(
-"	ldrex	%0, [%1]\n"
-"	teq	%0, #0\n"
-"	strexeq	%0, %2, [%1]"
-	: "=&r" (tmp)
-	: "r" (&lock->lock), "r" (1)
+"	ldrex	%0, [%2]\n"
+"	subs	%1, %0, %0, ror #16\n"
+"	addeq	%0, %0, %3\n"
+"	strexeq	%1, %0, [%2]"
+	: "=&r" (slock), "=&r" (tmp)
+	: "r" (&lock->slock), "I" (1 << TICKET_SHIFT)
 	: "cc");
 
 	if (tmp == 0) {
@@ -116,17 +119,38 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
 
 static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
+	unsigned long tmp;
+	u32 slock;
+
 	smp_mb();
 
 	__asm__ __volatile__(
-"	str	%1, [%0]\n"
-	:
-	: "r" (&lock->lock), "r" (0)
+"	mov	%1, #1\n"
+"1:	ldrex	%0, [%2]\n"
+"	uadd16	%0, %0, %1\n"
+"	strex	%1, %0, [%2]\n"
+"	teq	%1, #0\n"
+"	bne	1b"
+	: "=&r" (slock), "=&r" (tmp)
+	: "r" (&lock->slock)
 	: "cc");
 
 	dsb_sev();
 }
 
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
+{
+	struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets);
+	return tickets.owner != tickets.next;
+}
+
+static inline int arch_spin_is_contended(arch_spinlock_t *lock)
+{
+	struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets);
+	return (tickets.next - tickets.owner) > 1;
+}
+#define arch_spin_is_contended	arch_spin_is_contended
+
 /*
  * RWLOCKS
  *
diff --git a/arch/arm/include/asm/spinlock_types.h b/arch/arm/include/asm/spinlock_types.h
index d14d197ae04a..b262d2f8b478 100644
--- a/arch/arm/include/asm/spinlock_types.h
+++ b/arch/arm/include/asm/spinlock_types.h
@@ -5,11 +5,24 @@
 # error "please don't include this file directly"
 #endif
 
+#define TICKET_SHIFT	16
+
 typedef struct {
-	volatile unsigned int lock;
+	union {
+		u32 slock;
+		struct __raw_tickets {
+#ifdef __ARMEB__
+			u16 next;
+			u16 owner;
+#else
+			u16 owner;
+			u16 next;
+#endif
+		} tickets;
+	};
 } arch_spinlock_t;
 
-#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ { 0 } }
 
 typedef struct {
 	volatile unsigned int lock;
-- 
cgit 


From 881ccccb6bd3f0e1fff8b9addbe0de90e0b16166 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 6 Jul 2012 15:44:13 +0100
Subject: ARM: 7447/1: rwlocks: remove unused branch labels from trylock
 routines

The ARM arch_{read,write}_trylock implementations include unused
backwards branch labels, since we don't retry the locking operation
if the exclusive store fails.

This patch removes the labels.

Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/spinlock.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
index 0da2effd4b37..b4ca707d0a69 100644
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -182,7 +182,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
 	unsigned long tmp;
 
 	__asm__ __volatile__(
-"1:	ldrex	%0, [%1]\n"
+"	ldrex	%0, [%1]\n"
 "	teq	%0, #0\n"
 "	strexeq	%0, %2, [%1]"
 	: "=&r" (tmp)
@@ -268,7 +268,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
 	unsigned long tmp, tmp2 = 1;
 
 	__asm__ __volatile__(
-"1:	ldrex	%0, [%2]\n"
+"	ldrex	%0, [%2]\n"
 "	adds	%0, %0, #1\n"
 "	strexpl	%1, %0, [%2]\n"
 	: "=&r" (tmp), "+r" (tmp2)
-- 
cgit 


From 4295b898f5a5c7e62ae68e7a4ecc4b414622ffe6 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 6 Jul 2012 15:45:00 +0100
Subject: ARM: 7448/1: perf: remove arm_perf_pmu_ids global enumeration

In order to provide PMU name strings compatible with the OProfile
user ABI, an enumeration of all PMUs is currently used by perf to
identify each PMU uniquely. Unfortunately, this does not scale well
in the presence of multiple PMUs and creates a single, global namespace
across all PMUs in the system.

This patch removes the enumeration and instead uses the name string
for the PMU to map onto the OProfile variant. perf_pmu_name is
implemented for CPU PMUs, which is all that OProfile cares about anyway.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/perf_event.h | 17 +----------------
 arch/arm/include/asm/pmu.h        |  3 +--
 2 files changed, 2 insertions(+), 18 deletions(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h
index 00cbe10a50e3..e074948d8143 100644
--- a/arch/arm/include/asm/perf_event.h
+++ b/arch/arm/include/asm/perf_event.h
@@ -12,21 +12,6 @@
 #ifndef __ARM_PERF_EVENT_H__
 #define __ARM_PERF_EVENT_H__
 
-/* ARM perf PMU IDs for use by internal perf clients. */
-enum arm_perf_pmu_ids {
-	ARM_PERF_PMU_ID_XSCALE1	= 0,
-	ARM_PERF_PMU_ID_XSCALE2,
-	ARM_PERF_PMU_ID_V6,
-	ARM_PERF_PMU_ID_V6MP,
-	ARM_PERF_PMU_ID_CA8,
-	ARM_PERF_PMU_ID_CA9,
-	ARM_PERF_PMU_ID_CA5,
-	ARM_PERF_PMU_ID_CA15,
-	ARM_PERF_PMU_ID_CA7,
-	ARM_NUM_PMU_IDS,
-};
-
-extern enum arm_perf_pmu_ids
-armpmu_get_pmu_id(void);
+/* Nothing to see here... */
 
 #endif /* __ARM_PERF_EVENT_H__ */
diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
index 90114faa9f3c..4432305f4a2a 100644
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -103,10 +103,9 @@ struct pmu_hw_events {
 
 struct arm_pmu {
 	struct pmu	pmu;
-	enum arm_perf_pmu_ids id;
 	enum arm_pmu_type type;
 	cpumask_t	active_irqs;
-	const char	*name;
+	char		*name;
 	irqreturn_t	(*handle_irq)(int irq_num, void *dev);
 	void		(*enable)(struct hw_perf_event *evt, int idx);
 	void		(*disable)(struct hw_perf_event *evt, int idx);
-- 
cgit 


From 8c56cc8be5b38e3684eba96dc9b3f7ca7e495755 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 6 Jul 2012 15:45:39 +0100
Subject: ARM: 7449/1: use generic strnlen_user and strncpy_from_user functions

This patch implements the word-at-a-time interface for ARM using the
same algorithm as x86. We use the fls macro from ARMv5 onwards, where
we have a clz instruction available which saves us a mov instruction
when targetting Thumb-2. For older CPUs, we use the magic 0x0ff0001
constant. Big-endian configurations make use of the implementation from
asm-generic.

With this implemented, we can replace our byte-at-a-time strnlen_user
and strncpy_from_user functions with the optimised generic versions.

Reviewed-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/uaccess.h        | 27 ++++-------------
 arch/arm/include/asm/word-at-a-time.h | 55 +++++++++++++++++++++++++++++++++++
 2 files changed, 61 insertions(+), 21 deletions(-)
 create mode 100644 arch/arm/include/asm/word-at-a-time.h

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 71f6536d17ac..479a6352e0b5 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -189,6 +189,9 @@ static inline void set_fs(mm_segment_t fs)
 
 #define access_ok(type,addr,size)	(__range_ok(addr,size) == 0)
 
+#define user_addr_max() \
+	(segment_eq(get_fs(), USER_DS) ? TASK_SIZE : ~0UL)
+
 /*
  * The "__xxx" versions of the user access functions do not verify the
  * address space - it must have been done previously with a separate
@@ -398,9 +401,6 @@ extern unsigned long __must_check __clear_user_std(void __user *addr, unsigned l
 #define __clear_user(addr,n)		(memset((void __force *)addr, 0, n), 0)
 #endif
 
-extern unsigned long __must_check __strncpy_from_user(char *to, const char __user *from, unsigned long count);
-extern unsigned long __must_check __strnlen_user(const char __user *s, long n);
-
 static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n)
 {
 	if (access_ok(VERIFY_READ, from, n))
@@ -427,24 +427,9 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo
 	return n;
 }
 
-static inline long __must_check strncpy_from_user(char *dst, const char __user *src, long count)
-{
-	long res = -EFAULT;
-	if (access_ok(VERIFY_READ, src, 1))
-		res = __strncpy_from_user(dst, src, count);
-	return res;
-}
-
-#define strlen_user(s)	strnlen_user(s, ~0UL >> 1)
+extern long strncpy_from_user(char *dest, const char __user *src, long count);
 
-static inline long __must_check strnlen_user(const char __user *s, long n)
-{
-	unsigned long res = 0;
-
-	if (__addr_ok(s))
-		res = __strnlen_user(s, n);
-
-	return res;
-}
+extern __must_check long strlen_user(const char __user *str);
+extern __must_check long strnlen_user(const char __user *str, long n);
 
 #endif /* _ASMARM_UACCESS_H */
diff --git a/arch/arm/include/asm/word-at-a-time.h b/arch/arm/include/asm/word-at-a-time.h
new file mode 100644
index 000000000000..74b2d4578577
--- /dev/null
+++ b/arch/arm/include/asm/word-at-a-time.h
@@ -0,0 +1,55 @@
+#ifndef __ASM_ARM_WORD_AT_A_TIME_H
+#define __ASM_ARM_WORD_AT_A_TIME_H
+
+#ifndef __ARMEB__
+
+/*
+ * Little-endian word-at-a-time zero byte handling.
+ * Heavily based on the x86 algorithm.
+ */
+#include <linux/kernel.h>
+
+struct word_at_a_time {
+	const unsigned long one_bits, high_bits;
+};
+
+#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }
+
+static inline unsigned long has_zero(unsigned long a, unsigned long *bits,
+				     const struct word_at_a_time *c)
+{
+	unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits;
+	*bits = mask;
+	return mask;
+}
+
+#define prep_zero_mask(a, bits, c) (bits)
+
+static inline unsigned long create_zero_mask(unsigned long bits)
+{
+	bits = (bits - 1) & ~bits;
+	return bits >> 7;
+}
+
+static inline unsigned long find_zero(unsigned long mask)
+{
+	unsigned long ret;
+
+#if __LINUX_ARM_ARCH__ >= 5
+	/* We have clz available. */
+	ret = fls(mask) >> 3;
+#else
+	/* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
+	ret = (0x0ff0001 + mask) >> 23;
+	/* Fix the 1 for 00 case */
+	ret &= mask;
+#endif
+
+	return ret;
+}
+
+#else	/* __ARMEB__ */
+#include <asm-generic/word-at-a-time.h>
+#endif
+
+#endif /* __ASM_ARM_WORD_AT_A_TIME_H */
-- 
cgit 


From b9a50f74905ad9126c91b495ece8a5f45434c643 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 6 Jul 2012 15:46:08 +0100
Subject: ARM: 7450/1: dcache: select DCACHE_WORD_ACCESS for little-endian
 ARMv6+ CPUs

DCACHE_WORD_ACCESS uses the word-at-a-time API for optimised string
comparisons in the vfs layer.

This patch implements support for load_unaligned_zeropad for ARM CPUs
with native support for unaligned memory accesses (v6+) when running
little-endian.

Reviewed-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/word-at-a-time.h | 41 +++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/word-at-a-time.h b/arch/arm/include/asm/word-at-a-time.h
index 74b2d4578577..4d52f92967a6 100644
--- a/arch/arm/include/asm/word-at-a-time.h
+++ b/arch/arm/include/asm/word-at-a-time.h
@@ -48,6 +48,47 @@ static inline unsigned long find_zero(unsigned long mask)
 	return ret;
 }
 
+#ifdef CONFIG_DCACHE_WORD_ACCESS
+
+#define zero_bytemask(mask) (mask)
+
+/*
+ * Load an unaligned word from kernel space.
+ *
+ * In the (very unlikely) case of the word being a page-crosser
+ * and the next page not being mapped, take the exception and
+ * return zeroes in the non-existing part.
+ */
+static inline unsigned long load_unaligned_zeropad(const void *addr)
+{
+	unsigned long ret, offset;
+
+	/* Load word from unaligned pointer addr */
+	asm(
+	"1:	ldr	%0, [%2]\n"
+	"2:\n"
+	"	.pushsection .fixup,\"ax\"\n"
+	"	.align 2\n"
+	"3:	and	%1, %2, #0x3\n"
+	"	bic	%2, %2, #0x3\n"
+	"	ldr	%0, [%2]\n"
+	"	lsl	%1, %1, #0x3\n"
+	"	lsr	%0, %0, %1\n"
+	"	b	2b\n"
+	"	.popsection\n"
+	"	.pushsection __ex_table,\"a\"\n"
+	"	.align	3\n"
+	"	.long	1b, 3b\n"
+	"	.popsection"
+	: "=&r" (ret), "=&r" (offset)
+	: "r" (addr), "Qo" (*(unsigned long *)addr));
+
+	return ret;
+}
+
+
+#endif	/* DCACHE_WORD_ACCESS */
+
 #else	/* __ARMEB__ */
 #include <asm-generic/word-at-a-time.h>
 #endif
-- 
cgit 


From 923df96b9f31b7d08d8438ff9677326d9537accf Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 6 Jul 2012 15:46:45 +0100
Subject: ARM: 7451/1: arch timer: implement read_current_timer and get_cycles

This patch implements read_current_timer using the architected timers
when they are selected via CONFIG_ARM_ARCH_TIMER. If they are detected
not to be usable at runtime, we return -ENXIO to the caller.

Furthermore, if read_current_timer is exported then we can implement
get_cycles in terms of it for use as both an entropy source and for
implementing __udelay and friends.

Tested-by: Shinya Kuribayashi <shinya.kuribayashi.px@renesas.com>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/arch_timer.h |  3 +++
 arch/arm/include/asm/timex.h      | 10 ++++++----
 2 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h
index ed2e95d46e29..62e75475e57e 100644
--- a/arch/arm/include/asm/arch_timer.h
+++ b/arch/arm/include/asm/arch_timer.h
@@ -1,7 +1,10 @@
 #ifndef __ASMARM_ARCH_TIMER_H
 #define __ASMARM_ARCH_TIMER_H
 
+#include <asm/errno.h>
+
 #ifdef CONFIG_ARM_ARCH_TIMER
+#define ARCH_HAS_READ_CURRENT_TIMER
 int arch_timer_of_register(void);
 int arch_timer_sched_clock_init(void);
 #else
diff --git a/arch/arm/include/asm/timex.h b/arch/arm/include/asm/timex.h
index 3be8de3adaba..ce119442277c 100644
--- a/arch/arm/include/asm/timex.h
+++ b/arch/arm/include/asm/timex.h
@@ -12,13 +12,15 @@
 #ifndef _ASMARM_TIMEX_H
 #define _ASMARM_TIMEX_H
 
+#include <asm/arch_timer.h>
 #include <mach/timex.h>
 
 typedef unsigned long cycles_t;
 
-static inline cycles_t get_cycles (void)
-{
-	return 0;
-}
+#ifdef ARCH_HAS_READ_CURRENT_TIMER
+#define get_cycles()	({ cycles_t c; read_current_timer(&c) ? 0 : c; })
+#else
+#define get_cycles()	(0)
+#endif
 
 #endif
-- 
cgit 


From d0a533b18235d36206b9b422efadb7cee444dfdb Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 6 Jul 2012 15:47:17 +0100
Subject: ARM: 7452/1: delay: allow timer-based delay implementation to be
 selected

This patch allows a timer-based delay implementation to be selected by
switching the delay routines over to use get_cycles, which is
implemented in terms of read_current_timer. This further allows us to
skip the loop calibration and have a consistent delay function in the
face of core frequency scaling.

To avoid the pain of dealing with memory-mapped counters, this
implementation uses the co-processor interface to the architected timers
when they are available. The previous loop-based implementation is
kept around for CPUs without the architected timers and we retain both
the maximum delay (2ms) and the corresponding conversion factors for
determining the number of loops required for a given interval. Since the
indirection of the timer routines will only work when called from C,
the sa1100 sleep routines are modified to branch to the loop-based delay
functions directly.

Tested-by: Shinya Kuribayashi <shinya.kuribayashi.px@renesas.com>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/delay.h | 32 +++++++++++++++++++++++++-------
 1 file changed, 25 insertions(+), 7 deletions(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/delay.h b/arch/arm/include/asm/delay.h
index b2deda181549..dc6145120de3 100644
--- a/arch/arm/include/asm/delay.h
+++ b/arch/arm/include/asm/delay.h
@@ -6,9 +6,22 @@
 #ifndef __ASM_ARM_DELAY_H
 #define __ASM_ARM_DELAY_H
 
+#include <asm/memory.h>
 #include <asm/param.h>	/* HZ */
 
-extern void __delay(int loops);
+#define MAX_UDELAY_MS	2
+#define UDELAY_MULT	((UL(2199023) * HZ) >> 11)
+#define UDELAY_SHIFT	30
+
+#ifndef __ASSEMBLY__
+
+extern struct arm_delay_ops {
+	void (*delay)(unsigned long);
+	void (*const_udelay)(unsigned long);
+	void (*udelay)(unsigned long);
+} arm_delay_ops;
+
+#define __delay(n)		arm_delay_ops.delay(n)
 
 /*
  * This function intentionally does not exist; if you see references to
@@ -23,22 +36,27 @@ extern void __bad_udelay(void);
  * division by multiplication: you don't have to worry about
  * loss of precision.
  *
- * Use only for very small delays ( < 1 msec).  Should probably use a
+ * Use only for very small delays ( < 2 msec).  Should probably use a
  * lookup table, really, as the multiplications take much too long with
  * short delays.  This is a "reasonable" implementation, though (and the
  * first constant multiplications gets optimized away if the delay is
  * a constant)
  */
-extern void __udelay(unsigned long usecs);
-extern void __const_udelay(unsigned long);
-
-#define MAX_UDELAY_MS 2
+#define __udelay(n)		arm_delay_ops.udelay(n)
+#define __const_udelay(n)	arm_delay_ops.const_udelay(n)
 
 #define udelay(n)							\
 	(__builtin_constant_p(n) ?					\
 	  ((n) > (MAX_UDELAY_MS * 1000) ? __bad_udelay() :		\
-			__const_udelay((n) * ((2199023U*HZ)>>11))) :	\
+			__const_udelay((n) * UDELAY_MULT)) :		\
 	  __udelay(n))
 
+/* Loop-based definitions for assembly code. */
+extern void __loop_delay(unsigned long loops);
+extern void __loop_udelay(unsigned long usecs);
+extern void __loop_const_udelay(unsigned long);
+
+#endif /* __ASSEMBLY__ */
+
 #endif /* defined(_ARM_DELAY_H) */
 
-- 
cgit 


From 4b7a7d889007ec22d4df741cc08ef79583d11189 Mon Sep 17 00:00:00 2001
From: Cong Wang <amwang@redhat.com>
Date: Fri, 25 Nov 2011 22:53:10 +0800
Subject: arm: remove km_type definitions

Signed-off-by: Cong Wang <amwang@redhat.com>
---
 arch/arm/include/asm/kmap_types.h | 26 +-------------------------
 1 file changed, 1 insertion(+), 25 deletions(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/kmap_types.h b/arch/arm/include/asm/kmap_types.h
index e51b1e81df05..83eb2f772911 100644
--- a/arch/arm/include/asm/kmap_types.h
+++ b/arch/arm/include/asm/kmap_types.h
@@ -4,30 +4,6 @@
 /*
  * This is the "bare minimum".  AIO seems to require this.
  */
-enum km_type {
-	KM_BOUNCE_READ,
-	KM_SKB_SUNRPC_DATA,
-	KM_SKB_DATA_SOFTIRQ,
-	KM_USER0,
-	KM_USER1,
-	KM_BIO_SRC_IRQ,
-	KM_BIO_DST_IRQ,
-	KM_PTE0,
-	KM_PTE1,
-	KM_IRQ0,
-	KM_IRQ1,
-	KM_SOFTIRQ0,
-	KM_SOFTIRQ1,
-	KM_L1_CACHE,
-	KM_L2_CACHE,
-	KM_KDB,
-	KM_TYPE_NR
-};
-
-#ifdef CONFIG_DEBUG_HIGHMEM
-#define KM_NMI		(-1)
-#define KM_NMI_PTE	(-1)
-#define KM_IRQ_PTE	(-1)
-#endif
+#define KM_TYPE_NR 16
 
 #endif
-- 
cgit 


From a5d5f7daa744b34477c4a12728bde0a1694a1707 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 12 Jul 2012 23:57:35 +0100
Subject: ARM: 7465/1: Handle >4GB memory sizes in device tree and
 mem=size@start option

The memory regions which are passed to arm_add_memory() from
device tree blobs via early_init_dt_add_memory_arch() can
have sizes which are larger than will fit in a 32 bit integer,
so switch to using a phys_addr_t to hold them, to avoid
silently dropping the top 32 bits of the size. Similarly, use
phys_addr_t in early_mem() so that mem=size@start command line
options specifying more than 4GB behave sensibly.

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/setup.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/setup.h b/arch/arm/include/asm/setup.h
index 23ebc0c82a39..24d284a1bfc7 100644
--- a/arch/arm/include/asm/setup.h
+++ b/arch/arm/include/asm/setup.h
@@ -196,7 +196,7 @@ static const struct tagtable __tagtable_##fn __tag = { tag, fn }
 
 struct membank {
 	phys_addr_t start;
-	unsigned long size;
+	phys_addr_t size;
 	unsigned int highmem;
 };
 
@@ -217,7 +217,7 @@ extern struct meminfo meminfo;
 #define bank_phys_end(bank)	((bank)->start + (bank)->size)
 #define bank_phys_size(bank)	(bank)->size
 
-extern int arm_add_memory(phys_addr_t start, unsigned long size);
+extern int arm_add_memory(phys_addr_t start, phys_addr_t size);
 extern void early_print(const char *str, ...);
 extern void dump_machine_table(void);
 
-- 
cgit 


From e9da6e9905e639b0f842a244bc770b48ad0523e9 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Mon, 30 Jul 2012 09:11:33 +0200
Subject: ARM: dma-mapping: remove custom consistent dma region

This patch changes dma-mapping subsystem to use generic vmalloc areas
for all consistent dma allocations. This increases the total size limit
of the consistent allocations and removes platform hacks and a lot of
duplicated code.

Atomic allocations are served from special pool preallocated on boot,
because vmalloc areas cannot be reliably created in atomic context.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Kyungmin Park <kyungmin.park@samsung.com>
Reviewed-by: Minchan Kim <minchan@kernel.org>
---
 arch/arm/include/asm/dma-mapping.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index bbef15d04890..80777d871422 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -226,7 +226,7 @@ static inline int dma_mmap_writecombine(struct device *dev, struct vm_area_struc
  * DMA region above it's default value of 2MB. It must be called before the
  * memory allocator is initialised, i.e. before any core_initcall.
  */
-extern void __init init_consistent_dma_size(unsigned long size);
+static inline void init_consistent_dma_size(unsigned long size) { }
 
 /*
  * For SA-1111, IXP425, and ADI systems  the dma-mapping functions are "magic"
-- 
cgit 


From 64ccc9c033c6089b2d426dad3c56477ab066c999 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Thu, 14 Jun 2012 13:03:04 +0200
Subject: common: dma-mapping: add support for generic dma_mmap_* calls

Commit 9adc5374 ('common: dma-mapping: introduce mmap method') added a
generic method for implementing mmap user call to dma_map_ops structure.

This patch converts ARM and PowerPC architectures (the only providers of
dma_mmap_coherent/dma_mmap_writecombine calls) to use this generic
dma_map_ops based call and adds a generic cross architecture
definition for dma_mmap_attrs, dma_mmap_coherent, dma_mmap_writecombine
functions.

The generic mmap virt_to_page-based fallback implementation is provided for
architectures which don't provide their own implementation for mmap method.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Kyungmin Park <kyungmin.park@samsung.com>
---
 arch/arm/include/asm/dma-mapping.h | 19 -------------------
 1 file changed, 19 deletions(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index 80777d871422..a04803331144 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -186,17 +186,6 @@ extern int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 			void *cpu_addr, dma_addr_t dma_addr, size_t size,
 			struct dma_attrs *attrs);
 
-#define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, NULL)
-
-static inline int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
-				  void *cpu_addr, dma_addr_t dma_addr,
-				  size_t size, struct dma_attrs *attrs)
-{
-	struct dma_map_ops *ops = get_dma_ops(dev);
-	BUG_ON(!ops);
-	return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
-}
-
 static inline void *dma_alloc_writecombine(struct device *dev, size_t size,
 				       dma_addr_t *dma_handle, gfp_t flag)
 {
@@ -213,14 +202,6 @@ static inline void dma_free_writecombine(struct device *dev, size_t size,
 	return dma_free_attrs(dev, size, cpu_addr, dma_handle, &attrs);
 }
 
-static inline int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma,
-		      void *cpu_addr, dma_addr_t dma_addr, size_t size)
-{
-	DEFINE_DMA_ATTRS(attrs);
-	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
-	return dma_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, &attrs);
-}
-
 /*
  * This can be called during boot to increase the size of the consistent
  * DMA region above it's default value of 2MB. It must be called before the
-- 
cgit 


From dc2832e1e7db3f9ad465d2fe894bd69ef05d1e4b Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Wed, 13 Jun 2012 10:01:15 +0200
Subject: ARM: dma-mapping: add support for dma_get_sgtable()

This patch adds support for dma_get_sgtable() function which is required
to let drivers to share the buffers allocated by DMA-mapping subsystem.

Generic implementation based on virt_to_page() is not suitable for ARM
dma-mapping subsystem.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Kyungmin Park <kyungmin.park@samsung.com>
---
 arch/arm/include/asm/dma-mapping.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index a04803331144..2ae842df4551 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -261,6 +261,9 @@ extern void arm_dma_sync_sg_for_cpu(struct device *, struct scatterlist *, int,
 		enum dma_data_direction);
 extern void arm_dma_sync_sg_for_device(struct device *, struct scatterlist *, int,
 		enum dma_data_direction);
+extern int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
+		void *cpu_addr, dma_addr_t dma_addr, size_t size,
+		struct dma_attrs *attrs);
 
 #endif /* __KERNEL__ */
 #endif
-- 
cgit 


From c1d7e01d7877a397655277a920aeaa3830ed9461 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 30 Jul 2012 14:42:46 -0700
Subject: ipc: use Kconfig options for __ARCH_WANT_[COMPAT_]IPC_PARSE_VERSION

Rather than #define the options manually in the architecture code, add
Kconfig options for them and select them there instead.  This also allows
us to select the compat IPC version parsing automatically for platforms
using the old compat IPC interface.

Reported-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/include/asm/unistd.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 512cd1473454..0cab47d4a83f 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -446,7 +446,6 @@
 
 #ifdef __KERNEL__
 
-#define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_GETHOSTNAME
 #define __ARCH_WANT_SYS_PAUSE
-- 
cgit 


From a76d7bd96d65fa5119adba97e1b58d95f2e78829 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 13 Jul 2012 19:15:40 +0100
Subject: ARM: 7467/1: mutex: use generic xchg-based implementation for ARMv6+

The open-coded mutex implementation for ARMv6+ cores suffers from a
severe lack of barriers, so in the uncontended case we don't actually
protect any accesses performed during the critical section.

Furthermore, the code is largely a duplication of the ARMv6+ atomic_dec
code but optimised to remove a branch instruction, as the mutex fastpath
was previously inlined. Now that this is executed out-of-line, we can
reuse the atomic access code for the locking (in fact, we use the xchg
code as this produces shorter critical sections).

This patch uses the generic xchg based implementation for mutexes on
ARMv6+, which introduces barriers to the lock/unlock operations and also
has the benefit of removing a fair amount of inline assembly code.

Cc: <stable@vger.kernel.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Nicolas Pitre <nico@linaro.org>
Reported-by: Shan Kang <kangshan0910@gmail.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/mutex.h | 119 ++-----------------------------------------
 1 file changed, 4 insertions(+), 115 deletions(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/mutex.h b/arch/arm/include/asm/mutex.h
index 93226cf23ae0..b1479fd04a95 100644
--- a/arch/arm/include/asm/mutex.h
+++ b/arch/arm/include/asm/mutex.h
@@ -7,121 +7,10 @@
  */
 #ifndef _ASM_MUTEX_H
 #define _ASM_MUTEX_H
-
-#if __LINUX_ARM_ARCH__ < 6
-/* On pre-ARMv6 hardware the swp based implementation is the most efficient. */
-# include <asm-generic/mutex-xchg.h>
-#else
-
 /*
- * Attempting to lock a mutex on ARMv6+ can be done with a bastardized
- * atomic decrement (it is not a reliable atomic decrement but it satisfies
- * the defined semantics for our purpose, while being smaller and faster
- * than a real atomic decrement or atomic swap.  The idea is to attempt
- * decrementing the lock value only once.  If once decremented it isn't zero,
- * or if its store-back fails due to a dispute on the exclusive store, we
- * simply bail out immediately through the slow path where the lock will be
- * reattempted until it succeeds.
+ * On pre-ARMv6 hardware this results in a swp-based implementation,
+ * which is the most efficient. For ARMv6+, we emit a pair of exclusive
+ * accesses instead.
  */
-static inline void
-__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
-	int __ex_flag, __res;
-
-	__asm__ (
-
-		"ldrex	%0, [%2]	\n\t"
-		"sub	%0, %0, #1	\n\t"
-		"strex	%1, %0, [%2]	"
-
-		: "=&r" (__res), "=&r" (__ex_flag)
-		: "r" (&(count)->counter)
-		: "cc","memory" );
-
-	__res |= __ex_flag;
-	if (unlikely(__res != 0))
-		fail_fn(count);
-}
-
-static inline int
-__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
-{
-	int __ex_flag, __res;
-
-	__asm__ (
-
-		"ldrex	%0, [%2]	\n\t"
-		"sub	%0, %0, #1	\n\t"
-		"strex	%1, %0, [%2]	"
-
-		: "=&r" (__res), "=&r" (__ex_flag)
-		: "r" (&(count)->counter)
-		: "cc","memory" );
-
-	__res |= __ex_flag;
-	if (unlikely(__res != 0))
-		__res = fail_fn(count);
-	return __res;
-}
-
-/*
- * Same trick is used for the unlock fast path. However the original value,
- * rather than the result, is used to test for success in order to have
- * better generated assembly.
- */
-static inline void
-__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
-	int __ex_flag, __res, __orig;
-
-	__asm__ (
-
-		"ldrex	%0, [%3]	\n\t"
-		"add	%1, %0, #1	\n\t"
-		"strex	%2, %1, [%3]	"
-
-		: "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag)
-		: "r" (&(count)->counter)
-		: "cc","memory" );
-
-	__orig |= __ex_flag;
-	if (unlikely(__orig != 0))
-		fail_fn(count);
-}
-
-/*
- * If the unlock was done on a contended lock, or if the unlock simply fails
- * then the mutex remains locked.
- */
-#define __mutex_slowpath_needs_to_unlock()	1
-
-/*
- * For __mutex_fastpath_trylock we use another construct which could be
- * described as a "single value cmpxchg".
- *
- * This provides the needed trylock semantics like cmpxchg would, but it is
- * lighter and less generic than a true cmpxchg implementation.
- */
-static inline int
-__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
-{
-	int __ex_flag, __res, __orig;
-
-	__asm__ (
-
-		"1: ldrex	%0, [%3]	\n\t"
-		"subs		%1, %0, #1	\n\t"
-		"strexeq	%2, %1, [%3]	\n\t"
-		"movlt		%0, #0		\n\t"
-		"cmpeq		%2, #0		\n\t"
-		"bgt		1b		"
-
-		: "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag)
-		: "r" (&count->counter)
-		: "cc", "memory" );
-
-	return __orig;
-}
-
-#endif
+#include <asm-generic/mutex-xchg.h>
 #endif
-- 
cgit 


From b74253f78400f9a4b42da84bb1de7540b88ce7c4 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 23 Jul 2012 14:18:13 +0100
Subject: ARM: 7479/1: mm: avoid NULL dereference when flushing gate_vma with
 VIVT caches

The vivt_flush_cache_{range,page} functions check that the mm_struct
of the VMA being flushed has been active on the current CPU before
performing the cache maintenance.

The gate_vma has a NULL mm_struct pointer and, as such, will cause a
kernel fault if we try to flush it with the above operations. This
happens during ELF core dumps, which include the gate_vma as it may be
useful for debugging purposes.

This patch adds checks to the VIVT cache flushing functions so that VMAs
with a NULL mm_struct are flushed unconditionally (the vectors page may
be dirty if we use it to store the current TLS pointer).

Cc: <stable@vger.kernel.org> # 3.4+
Reported-by: Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>
Tested-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/cacheflush.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 004c1bc95d2b..e4448e16046d 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -215,7 +215,9 @@ static inline void vivt_flush_cache_mm(struct mm_struct *mm)
 static inline void
 vivt_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
 {
-	if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm)))
+	struct mm_struct *mm = vma->vm_mm;
+
+	if (!mm || cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm)))
 		__cpuc_flush_user_range(start & PAGE_MASK, PAGE_ALIGN(end),
 					vma->vm_flags);
 }
@@ -223,7 +225,9 @@ vivt_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned
 static inline void
 vivt_flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn)
 {
-	if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) {
+	struct mm_struct *mm = vma->vm_mm;
+
+	if (!mm || cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) {
 		unsigned long addr = user_addr & PAGE_MASK;
 		__cpuc_flush_user_range(addr, addr + PAGE_SIZE, vma->vm_flags);
 	}
-- 
cgit 


From 237ec6f2e51d2fc2ff37c7c5f1ccc9264d09c85b Mon Sep 17 00:00:00 2001
From: Colin Cross <ccross@android.com>
Date: Tue, 7 Aug 2012 19:05:10 +0100
Subject: ARM: 7486/1: sched_clock: update epoch_cyc on resume

Many clocks that are used to provide sched_clock will reset during
suspend.  If read_sched_clock returns 0 after suspend, sched_clock will
appear to jump forward.  This patch resets cd.epoch_cyc to the current
value of read_sched_clock during resume, which causes sched_clock() just
after suspend to return the same value as sched_clock() just before
suspend.

In addition, during the window where epoch_ns has been updated before
suspend, but epoch_cyc has not been updated after suspend, it is unknown
whether the clock has reset or not, and sched_clock() could return a
bogus value.  Add a suspended flag, and return the pre-suspend epoch_ns
value during this period.

The new behavior is triggered by calling setup_sched_clock_needs_suspend
instead of setup_sched_clock.

Signed-off-by: Colin Cross <ccross@android.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/sched_clock.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/sched_clock.h b/arch/arm/include/asm/sched_clock.h
index e3f757263438..05b8e82ec9f5 100644
--- a/arch/arm/include/asm/sched_clock.h
+++ b/arch/arm/include/asm/sched_clock.h
@@ -10,5 +10,7 @@
 
 extern void sched_clock_postinit(void);
 extern void setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate);
+extern void setup_sched_clock_needs_suspend(u32 (*read)(void), int bits,
+		unsigned long rate);
 
 #endif
-- 
cgit 


From 47f1204329237a0f8655f5a9f14a38ac81946ca1 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 10 Aug 2012 17:51:18 +0100
Subject: ARM: 7487/1: mm: avoid setting nG bit for user mappings that aren't
 present

Swap entries are encoding in ptes such that !pte_present(pte) and
pte_file(pte). The remaining bits of the descriptor are used to identify
the swapfile and offset within it to the swap entry.

When writing such a pte for a user virtual address, set_pte_at
unconditionally sets the nG bit, which (in the case of LPAE) will
corrupt the swapfile offset and lead to a BUG:

[  140.494067] swap_free: Unused swap offset entry 000763b4
[  140.509989] BUG: Bad page map in process rs:main Q:Reg  pte:0ec76800 pmd:8f92e003

This patch fixes the problem by only setting the nG bit for user
mappings that are actually present.

Cc: <stable@vger.kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/pgtable.h | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index f66626d71e7d..d88f9f049e95 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -195,6 +195,18 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd)
 
 #define pte_clear(mm,addr,ptep)	set_pte_ext(ptep, __pte(0), 0)
 
+#define pte_none(pte)		(!pte_val(pte))
+#define pte_present(pte)	(pte_val(pte) & L_PTE_PRESENT)
+#define pte_write(pte)		(!(pte_val(pte) & L_PTE_RDONLY))
+#define pte_dirty(pte)		(pte_val(pte) & L_PTE_DIRTY)
+#define pte_young(pte)		(pte_val(pte) & L_PTE_YOUNG)
+#define pte_exec(pte)		(!(pte_val(pte) & L_PTE_XN))
+#define pte_special(pte)	(0)
+
+#define pte_present_user(pte) \
+	((pte_val(pte) & (L_PTE_PRESENT | L_PTE_USER)) == \
+	 (L_PTE_PRESENT | L_PTE_USER))
+
 #if __LINUX_ARM_ARCH__ < 6
 static inline void __sync_icache_dcache(pte_t pteval)
 {
@@ -206,25 +218,15 @@ extern void __sync_icache_dcache(pte_t pteval);
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep, pte_t pteval)
 {
-	if (addr >= TASK_SIZE)
-		set_pte_ext(ptep, pteval, 0);
-	else {
+	unsigned long ext = 0;
+
+	if (addr < TASK_SIZE && pte_present_user(pteval)) {
 		__sync_icache_dcache(pteval);
-		set_pte_ext(ptep, pteval, PTE_EXT_NG);
+		ext |= PTE_EXT_NG;
 	}
-}
 
-#define pte_none(pte)		(!pte_val(pte))
-#define pte_present(pte)	(pte_val(pte) & L_PTE_PRESENT)
-#define pte_write(pte)		(!(pte_val(pte) & L_PTE_RDONLY))
-#define pte_dirty(pte)		(pte_val(pte) & L_PTE_DIRTY)
-#define pte_young(pte)		(pte_val(pte) & L_PTE_YOUNG)
-#define pte_exec(pte)		(!(pte_val(pte) & L_PTE_XN))
-#define pte_special(pte)	(0)
-
-#define pte_present_user(pte) \
-	((pte_val(pte) & (L_PTE_PRESENT | L_PTE_USER)) == \
-	 (L_PTE_PRESENT | L_PTE_USER))
+	set_pte_ext(ptep, pteval, ext);
+}
 
 #define PTE_BIT_FUNC(fn,op) \
 static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; }
-- 
cgit 


From f5f2025ef3e2cdb593707cbf87378761f17befbe Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 10 Aug 2012 17:51:19 +0100
Subject: ARM: 7488/1: mm: use 5 bits for swapfile type encoding

Page migration encodes the pfn in the offset field of a swp_entry_t.
For LPAE, we support physical addresses of up to 36 bits (due to
sparsemem limitations with the size of page flags), requiring 24 bits
to represent a pfn. A further 3 bits are used to encode a swp_entry into
a pte, leaving 5 bits for the type field. Furthermore, the core code
defines MAX_SWAPFILES_SHIFT as 5, so the additional type bit does not
get used.

This patch reduces the width of the type field to 5 bits, allowing us
to create up to 31 swapfiles of 64GB each.

Cc: <stable@vger.kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/pgtable.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index d88f9f049e95..41dc31f834c3 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -253,13 +253,13 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
  *
  *   3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
  *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *   <--------------- offset --------------------> <- type --> 0 0 0
+ *   <--------------- offset ----------------------> < type -> 0 0 0
  *
- * This gives us up to 63 swap files and 32GB per swap file.  Note that
+ * This gives us up to 31 swap files and 64GB per swap file.  Note that
  * the offset field is always non-zero.
  */
 #define __SWP_TYPE_SHIFT	3
-#define __SWP_TYPE_BITS		6
+#define __SWP_TYPE_BITS		5
 #define __SWP_TYPE_MASK		((1 << __SWP_TYPE_BITS) - 1)
 #define __SWP_OFFSET_SHIFT	(__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)
 
-- 
cgit