From 158e8bfe802f730f9ea7cde32eee8b43285bdd4a Mon Sep 17 00:00:00 2001 From: Alessandro Rubini Date: Sun, 24 Jun 2012 12:46:26 +0100 Subject: ARM: 7432/1: use the new linux/sizes.h Signed-off-by: Alessandro Rubini Acked-by: Giancarlo Asnaghi Acked-by: Linus Walleij Cc: Alan Cox Signed-off-by: Russell King --- arch/arm/include/asm/memory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index fcb575747e5e..e965f1b560f1 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -16,7 +16,7 @@ #include #include #include -#include +#include #ifdef CONFIG_NEED_MACH_MEMORY_H #include -- cgit From bc89663aa5c7ca620f58c34ab531ca409119becc Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Thu, 28 Jun 2012 14:42:08 +0800 Subject: ARM: fiq: change FIQ_START to a variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit a2be01b (ARM: only include mach/irqs.h for !SPARSE_IRQ) makes mach/irqs.h only be included for !SPARSE_IRQ build. There are a nubmer of platforms have FIQ_START defined in mach/irqs.h for FIQ support. arch/arm/mach-rpc/include/mach/irqs.h:#define FIQ_START 64 arch/arm/mach-s3c24xx/include/mach/irqs.h:#define FIQ_START IRQ_EINT0 arch/arm/plat-mxc/include/mach/irqs.h:#define FIQ_START 0 If SPARSE_IRQ is enabled for any of these platforms, the following compile error will be seen. arch/arm/kernel/fiq.c: In function ‘enable_fiq’: arch/arm/kernel/fiq.c:127:19: error: ‘FIQ_START’ undeclared (first use in this function) arch/arm/kernel/fiq.c:127:19: note: each undeclared identifier is reported only once for each function it appears in arch/arm/kernel/fiq.c: In function ‘disable_fiq’: arch/arm/kernel/fiq.c:132:20: error: ‘FIQ_START’ undeclared (first use in this function) The patch changes fiq code to have init_FIQ take FIQ_START from platforms as a parameter and assign it to variable fiq_start which is to replace FIQ_START uses in enable_fiq/disable_fiq. Signed-off-by: Shawn Guo Cc: Kukjin Kim Cc: Sascha Hauer Cc: Rob Herring Acked-by: Russell King --- arch/arm/include/asm/mach/irq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/mach/irq.h b/arch/arm/include/asm/mach/irq.h index febe495d0c6e..15cb035309f7 100644 --- a/arch/arm/include/asm/mach/irq.h +++ b/arch/arm/include/asm/mach/irq.h @@ -17,7 +17,7 @@ struct seq_file; /* * This is internal. Do not use it. */ -extern void init_FIQ(void); +extern void init_FIQ(int); extern int show_fiq_list(struct seq_file *, int); #ifdef CONFIG_MULTI_IRQ_HANDLER -- cgit From a5203c4ce6750730b1d95a8bc1e8214765450f7e Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Sun, 8 Jul 2012 22:51:44 +0100 Subject: ARM: 7460/1: remove asm/locks.h Commit 64ac24e738823161693bf791f87adc802cf529ff ("Generic semaphore implementation") removed the last include of this header. Apparently it was just an oversight to keep this header. It can safely be removed now. Acked-by: Will Deacon Signed-off-by: Paul Bolle Signed-off-by: Russell King --- arch/arm/include/asm/locks.h | 274 ------------------------------------------- 1 file changed, 274 deletions(-) delete mode 100644 arch/arm/include/asm/locks.h (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/locks.h b/arch/arm/include/asm/locks.h deleted file mode 100644 index ef4c897772d1..000000000000 --- a/arch/arm/include/asm/locks.h +++ /dev/null @@ -1,274 +0,0 @@ -/* - * arch/arm/include/asm/locks.h - * - * Copyright (C) 2000 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Interrupt safe locking assembler. - */ -#ifndef __ASM_PROC_LOCKS_H -#define __ASM_PROC_LOCKS_H - -#if __LINUX_ARM_ARCH__ >= 6 - -#define __down_op(ptr,fail) \ - ({ \ - __asm__ __volatile__( \ - "@ down_op\n" \ -"1: ldrex lr, [%0]\n" \ -" sub lr, lr, %1\n" \ -" strex ip, lr, [%0]\n" \ -" teq ip, #0\n" \ -" bne 1b\n" \ -" teq lr, #0\n" \ -" movmi ip, %0\n" \ -" blmi " #fail \ - : \ - : "r" (ptr), "I" (1) \ - : "ip", "lr", "cc"); \ - smp_mb(); \ - }) - -#define __down_op_ret(ptr,fail) \ - ({ \ - unsigned int ret; \ - __asm__ __volatile__( \ - "@ down_op_ret\n" \ -"1: ldrex lr, [%1]\n" \ -" sub lr, lr, %2\n" \ -" strex ip, lr, [%1]\n" \ -" teq ip, #0\n" \ -" bne 1b\n" \ -" teq lr, #0\n" \ -" movmi ip, %1\n" \ -" movpl ip, #0\n" \ -" blmi " #fail "\n" \ -" mov %0, ip" \ - : "=&r" (ret) \ - : "r" (ptr), "I" (1) \ - : "ip", "lr", "cc"); \ - smp_mb(); \ - ret; \ - }) - -#define __up_op(ptr,wake) \ - ({ \ - smp_mb(); \ - __asm__ __volatile__( \ - "@ up_op\n" \ -"1: ldrex lr, [%0]\n" \ -" add lr, lr, %1\n" \ -" strex ip, lr, [%0]\n" \ -" teq ip, #0\n" \ -" bne 1b\n" \ -" cmp lr, #0\n" \ -" movle ip, %0\n" \ -" blle " #wake \ - : \ - : "r" (ptr), "I" (1) \ - : "ip", "lr", "cc"); \ - }) - -/* - * The value 0x01000000 supports up to 128 processors and - * lots of processes. BIAS must be chosen such that sub'ing - * BIAS once per CPU will result in the long remaining - * negative. - */ -#define RW_LOCK_BIAS 0x01000000 -#define RW_LOCK_BIAS_STR "0x01000000" - -#define __down_op_write(ptr,fail) \ - ({ \ - __asm__ __volatile__( \ - "@ down_op_write\n" \ -"1: ldrex lr, [%0]\n" \ -" sub lr, lr, %1\n" \ -" strex ip, lr, [%0]\n" \ -" teq ip, #0\n" \ -" bne 1b\n" \ -" teq lr, #0\n" \ -" movne ip, %0\n" \ -" blne " #fail \ - : \ - : "r" (ptr), "I" (RW_LOCK_BIAS) \ - : "ip", "lr", "cc"); \ - smp_mb(); \ - }) - -#define __up_op_write(ptr,wake) \ - ({ \ - smp_mb(); \ - __asm__ __volatile__( \ - "@ up_op_write\n" \ -"1: ldrex lr, [%0]\n" \ -" adds lr, lr, %1\n" \ -" strex ip, lr, [%0]\n" \ -" teq ip, #0\n" \ -" bne 1b\n" \ -" movcs ip, %0\n" \ -" blcs " #wake \ - : \ - : "r" (ptr), "I" (RW_LOCK_BIAS) \ - : "ip", "lr", "cc"); \ - }) - -#define __down_op_read(ptr,fail) \ - __down_op(ptr, fail) - -#define __up_op_read(ptr,wake) \ - ({ \ - smp_mb(); \ - __asm__ __volatile__( \ - "@ up_op_read\n" \ -"1: ldrex lr, [%0]\n" \ -" add lr, lr, %1\n" \ -" strex ip, lr, [%0]\n" \ -" teq ip, #0\n" \ -" bne 1b\n" \ -" teq lr, #0\n" \ -" moveq ip, %0\n" \ -" bleq " #wake \ - : \ - : "r" (ptr), "I" (1) \ - : "ip", "lr", "cc"); \ - }) - -#else - -#define __down_op(ptr,fail) \ - ({ \ - __asm__ __volatile__( \ - "@ down_op\n" \ -" mrs ip, cpsr\n" \ -" orr lr, ip, #128\n" \ -" msr cpsr_c, lr\n" \ -" ldr lr, [%0]\n" \ -" subs lr, lr, %1\n" \ -" str lr, [%0]\n" \ -" msr cpsr_c, ip\n" \ -" movmi ip, %0\n" \ -" blmi " #fail \ - : \ - : "r" (ptr), "I" (1) \ - : "ip", "lr", "cc"); \ - smp_mb(); \ - }) - -#define __down_op_ret(ptr,fail) \ - ({ \ - unsigned int ret; \ - __asm__ __volatile__( \ - "@ down_op_ret\n" \ -" mrs ip, cpsr\n" \ -" orr lr, ip, #128\n" \ -" msr cpsr_c, lr\n" \ -" ldr lr, [%1]\n" \ -" subs lr, lr, %2\n" \ -" str lr, [%1]\n" \ -" msr cpsr_c, ip\n" \ -" movmi ip, %1\n" \ -" movpl ip, #0\n" \ -" blmi " #fail "\n" \ -" mov %0, ip" \ - : "=&r" (ret) \ - : "r" (ptr), "I" (1) \ - : "ip", "lr", "cc"); \ - smp_mb(); \ - ret; \ - }) - -#define __up_op(ptr,wake) \ - ({ \ - smp_mb(); \ - __asm__ __volatile__( \ - "@ up_op\n" \ -" mrs ip, cpsr\n" \ -" orr lr, ip, #128\n" \ -" msr cpsr_c, lr\n" \ -" ldr lr, [%0]\n" \ -" adds lr, lr, %1\n" \ -" str lr, [%0]\n" \ -" msr cpsr_c, ip\n" \ -" movle ip, %0\n" \ -" blle " #wake \ - : \ - : "r" (ptr), "I" (1) \ - : "ip", "lr", "cc"); \ - }) - -/* - * The value 0x01000000 supports up to 128 processors and - * lots of processes. BIAS must be chosen such that sub'ing - * BIAS once per CPU will result in the long remaining - * negative. - */ -#define RW_LOCK_BIAS 0x01000000 -#define RW_LOCK_BIAS_STR "0x01000000" - -#define __down_op_write(ptr,fail) \ - ({ \ - __asm__ __volatile__( \ - "@ down_op_write\n" \ -" mrs ip, cpsr\n" \ -" orr lr, ip, #128\n" \ -" msr cpsr_c, lr\n" \ -" ldr lr, [%0]\n" \ -" subs lr, lr, %1\n" \ -" str lr, [%0]\n" \ -" msr cpsr_c, ip\n" \ -" movne ip, %0\n" \ -" blne " #fail \ - : \ - : "r" (ptr), "I" (RW_LOCK_BIAS) \ - : "ip", "lr", "cc"); \ - smp_mb(); \ - }) - -#define __up_op_write(ptr,wake) \ - ({ \ - __asm__ __volatile__( \ - "@ up_op_write\n" \ -" mrs ip, cpsr\n" \ -" orr lr, ip, #128\n" \ -" msr cpsr_c, lr\n" \ -" ldr lr, [%0]\n" \ -" adds lr, lr, %1\n" \ -" str lr, [%0]\n" \ -" msr cpsr_c, ip\n" \ -" movcs ip, %0\n" \ -" blcs " #wake \ - : \ - : "r" (ptr), "I" (RW_LOCK_BIAS) \ - : "ip", "lr", "cc"); \ - smp_mb(); \ - }) - -#define __down_op_read(ptr,fail) \ - __down_op(ptr, fail) - -#define __up_op_read(ptr,wake) \ - ({ \ - smp_mb(); \ - __asm__ __volatile__( \ - "@ up_op_read\n" \ -" mrs ip, cpsr\n" \ -" orr lr, ip, #128\n" \ -" msr cpsr_c, lr\n" \ -" ldr lr, [%0]\n" \ -" adds lr, lr, %1\n" \ -" str lr, [%0]\n" \ -" msr cpsr_c, ip\n" \ -" moveq ip, %0\n" \ -" bleq " #wake \ - : \ - : "r" (ptr), "I" (1) \ - : "ip", "lr", "cc"); \ - }) - -#endif - -#endif -- cgit From 546c2896a42202dbc7d02f7c6ec9948ac1bf511b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jul 2012 15:43:41 +0100 Subject: ARM: 7446/1: spinlock: use ticket algorithm for ARMv6+ locking implementation Ticket spinlocks ensure locking fairness by introducing a FIFO-like nature to the granting of lock acquisitions and also reducing the thundering herd effect when spinning on a lock by allowing the cacheline to remain in a shared state amongst the waiting CPUs. This is especially important on systems where memory-access times are not necessarily uniform when accessing the lock structure (for example, on a multi-cluster platform where the lock is allocated into L1 when a CPU releases it). This patch implements the ticket spinlock algorithm for ARM, replacing the simpler implementation for ARMv6+ processors. Reviewed-by: Nicolas Pitre Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/spinlock.h | 72 +++++++++++++++++++++++------------ arch/arm/include/asm/spinlock_types.h | 17 ++++++++- 2 files changed, 63 insertions(+), 26 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index 65fa3c88095c..0da2effd4b37 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -59,18 +59,13 @@ static inline void dsb_sev(void) } /* - * ARMv6 Spin-locking. + * ARMv6 ticket-based spin-locking. * - * We exclusively read the old value. If it is zero, we may have - * won the lock, so we try exclusively storing it. A memory barrier - * is required after we get a lock, and before we release it, because - * V6 CPUs are assumed to have weakly ordered memory. - * - * Unlocked value: 0 - * Locked value: 1 + * A memory barrier is required after we get a lock, and before we + * release it, because V6 CPUs are assumed to have weakly ordered + * memory. */ -#define arch_spin_is_locked(x) ((x)->lock != 0) #define arch_spin_unlock_wait(lock) \ do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) @@ -79,31 +74,39 @@ static inline void dsb_sev(void) static inline void arch_spin_lock(arch_spinlock_t *lock) { unsigned long tmp; + u32 newval; + arch_spinlock_t lockval; __asm__ __volatile__( -"1: ldrex %0, [%1]\n" -" teq %0, #0\n" - WFE("ne") -" strexeq %0, %2, [%1]\n" -" teqeq %0, #0\n" +"1: ldrex %0, [%3]\n" +" add %1, %0, %4\n" +" strex %2, %1, [%3]\n" +" teq %2, #0\n" " bne 1b" - : "=&r" (tmp) - : "r" (&lock->lock), "r" (1) + : "=&r" (lockval), "=&r" (newval), "=&r" (tmp) + : "r" (&lock->slock), "I" (1 << TICKET_SHIFT) : "cc"); + while (lockval.tickets.next != lockval.tickets.owner) { + wfe(); + lockval.tickets.owner = ACCESS_ONCE(lock->tickets.owner); + } + smp_mb(); } static inline int arch_spin_trylock(arch_spinlock_t *lock) { unsigned long tmp; + u32 slock; __asm__ __volatile__( -" ldrex %0, [%1]\n" -" teq %0, #0\n" -" strexeq %0, %2, [%1]" - : "=&r" (tmp) - : "r" (&lock->lock), "r" (1) +" ldrex %0, [%2]\n" +" subs %1, %0, %0, ror #16\n" +" addeq %0, %0, %3\n" +" strexeq %1, %0, [%2]" + : "=&r" (slock), "=&r" (tmp) + : "r" (&lock->slock), "I" (1 << TICKET_SHIFT) : "cc"); if (tmp == 0) { @@ -116,17 +119,38 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) static inline void arch_spin_unlock(arch_spinlock_t *lock) { + unsigned long tmp; + u32 slock; + smp_mb(); __asm__ __volatile__( -" str %1, [%0]\n" - : - : "r" (&lock->lock), "r" (0) +" mov %1, #1\n" +"1: ldrex %0, [%2]\n" +" uadd16 %0, %0, %1\n" +" strex %1, %0, [%2]\n" +" teq %1, #0\n" +" bne 1b" + : "=&r" (slock), "=&r" (tmp) + : "r" (&lock->slock) : "cc"); dsb_sev(); } +static inline int arch_spin_is_locked(arch_spinlock_t *lock) +{ + struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets); + return tickets.owner != tickets.next; +} + +static inline int arch_spin_is_contended(arch_spinlock_t *lock) +{ + struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets); + return (tickets.next - tickets.owner) > 1; +} +#define arch_spin_is_contended arch_spin_is_contended + /* * RWLOCKS * diff --git a/arch/arm/include/asm/spinlock_types.h b/arch/arm/include/asm/spinlock_types.h index d14d197ae04a..b262d2f8b478 100644 --- a/arch/arm/include/asm/spinlock_types.h +++ b/arch/arm/include/asm/spinlock_types.h @@ -5,11 +5,24 @@ # error "please don't include this file directly" #endif +#define TICKET_SHIFT 16 + typedef struct { - volatile unsigned int lock; + union { + u32 slock; + struct __raw_tickets { +#ifdef __ARMEB__ + u16 next; + u16 owner; +#else + u16 owner; + u16 next; +#endif + } tickets; + }; } arch_spinlock_t; -#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } +#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } } typedef struct { volatile unsigned int lock; -- cgit From 881ccccb6bd3f0e1fff8b9addbe0de90e0b16166 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jul 2012 15:44:13 +0100 Subject: ARM: 7447/1: rwlocks: remove unused branch labels from trylock routines The ARM arch_{read,write}_trylock implementations include unused backwards branch labels, since we don't retry the locking operation if the exclusive store fails. This patch removes the labels. Acked-by: Nicolas Pitre Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/spinlock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index 0da2effd4b37..b4ca707d0a69 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -182,7 +182,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw) unsigned long tmp; __asm__ __volatile__( -"1: ldrex %0, [%1]\n" +" ldrex %0, [%1]\n" " teq %0, #0\n" " strexeq %0, %2, [%1]" : "=&r" (tmp) @@ -268,7 +268,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) unsigned long tmp, tmp2 = 1; __asm__ __volatile__( -"1: ldrex %0, [%2]\n" +" ldrex %0, [%2]\n" " adds %0, %0, #1\n" " strexpl %1, %0, [%2]\n" : "=&r" (tmp), "+r" (tmp2) -- cgit From 4295b898f5a5c7e62ae68e7a4ecc4b414622ffe6 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jul 2012 15:45:00 +0100 Subject: ARM: 7448/1: perf: remove arm_perf_pmu_ids global enumeration In order to provide PMU name strings compatible with the OProfile user ABI, an enumeration of all PMUs is currently used by perf to identify each PMU uniquely. Unfortunately, this does not scale well in the presence of multiple PMUs and creates a single, global namespace across all PMUs in the system. This patch removes the enumeration and instead uses the name string for the PMU to map onto the OProfile variant. perf_pmu_name is implemented for CPU PMUs, which is all that OProfile cares about anyway. Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/perf_event.h | 17 +---------------- arch/arm/include/asm/pmu.h | 3 +-- 2 files changed, 2 insertions(+), 18 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h index 00cbe10a50e3..e074948d8143 100644 --- a/arch/arm/include/asm/perf_event.h +++ b/arch/arm/include/asm/perf_event.h @@ -12,21 +12,6 @@ #ifndef __ARM_PERF_EVENT_H__ #define __ARM_PERF_EVENT_H__ -/* ARM perf PMU IDs for use by internal perf clients. */ -enum arm_perf_pmu_ids { - ARM_PERF_PMU_ID_XSCALE1 = 0, - ARM_PERF_PMU_ID_XSCALE2, - ARM_PERF_PMU_ID_V6, - ARM_PERF_PMU_ID_V6MP, - ARM_PERF_PMU_ID_CA8, - ARM_PERF_PMU_ID_CA9, - ARM_PERF_PMU_ID_CA5, - ARM_PERF_PMU_ID_CA15, - ARM_PERF_PMU_ID_CA7, - ARM_NUM_PMU_IDS, -}; - -extern enum arm_perf_pmu_ids -armpmu_get_pmu_id(void); +/* Nothing to see here... */ #endif /* __ARM_PERF_EVENT_H__ */ diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h index 90114faa9f3c..4432305f4a2a 100644 --- a/arch/arm/include/asm/pmu.h +++ b/arch/arm/include/asm/pmu.h @@ -103,10 +103,9 @@ struct pmu_hw_events { struct arm_pmu { struct pmu pmu; - enum arm_perf_pmu_ids id; enum arm_pmu_type type; cpumask_t active_irqs; - const char *name; + char *name; irqreturn_t (*handle_irq)(int irq_num, void *dev); void (*enable)(struct hw_perf_event *evt, int idx); void (*disable)(struct hw_perf_event *evt, int idx); -- cgit From 8c56cc8be5b38e3684eba96dc9b3f7ca7e495755 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jul 2012 15:45:39 +0100 Subject: ARM: 7449/1: use generic strnlen_user and strncpy_from_user functions This patch implements the word-at-a-time interface for ARM using the same algorithm as x86. We use the fls macro from ARMv5 onwards, where we have a clz instruction available which saves us a mov instruction when targetting Thumb-2. For older CPUs, we use the magic 0x0ff0001 constant. Big-endian configurations make use of the implementation from asm-generic. With this implemented, we can replace our byte-at-a-time strnlen_user and strncpy_from_user functions with the optimised generic versions. Reviewed-by: Nicolas Pitre Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/uaccess.h | 27 ++++------------- arch/arm/include/asm/word-at-a-time.h | 55 +++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 21 deletions(-) create mode 100644 arch/arm/include/asm/word-at-a-time.h (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 71f6536d17ac..479a6352e0b5 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -189,6 +189,9 @@ static inline void set_fs(mm_segment_t fs) #define access_ok(type,addr,size) (__range_ok(addr,size) == 0) +#define user_addr_max() \ + (segment_eq(get_fs(), USER_DS) ? TASK_SIZE : ~0UL) + /* * The "__xxx" versions of the user access functions do not verify the * address space - it must have been done previously with a separate @@ -398,9 +401,6 @@ extern unsigned long __must_check __clear_user_std(void __user *addr, unsigned l #define __clear_user(addr,n) (memset((void __force *)addr, 0, n), 0) #endif -extern unsigned long __must_check __strncpy_from_user(char *to, const char __user *from, unsigned long count); -extern unsigned long __must_check __strnlen_user(const char __user *s, long n); - static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { if (access_ok(VERIFY_READ, from, n)) @@ -427,24 +427,9 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo return n; } -static inline long __must_check strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res = -EFAULT; - if (access_ok(VERIFY_READ, src, 1)) - res = __strncpy_from_user(dst, src, count); - return res; -} - -#define strlen_user(s) strnlen_user(s, ~0UL >> 1) +extern long strncpy_from_user(char *dest, const char __user *src, long count); -static inline long __must_check strnlen_user(const char __user *s, long n) -{ - unsigned long res = 0; - - if (__addr_ok(s)) - res = __strnlen_user(s, n); - - return res; -} +extern __must_check long strlen_user(const char __user *str); +extern __must_check long strnlen_user(const char __user *str, long n); #endif /* _ASMARM_UACCESS_H */ diff --git a/arch/arm/include/asm/word-at-a-time.h b/arch/arm/include/asm/word-at-a-time.h new file mode 100644 index 000000000000..74b2d4578577 --- /dev/null +++ b/arch/arm/include/asm/word-at-a-time.h @@ -0,0 +1,55 @@ +#ifndef __ASM_ARM_WORD_AT_A_TIME_H +#define __ASM_ARM_WORD_AT_A_TIME_H + +#ifndef __ARMEB__ + +/* + * Little-endian word-at-a-time zero byte handling. + * Heavily based on the x86 algorithm. + */ +#include + +struct word_at_a_time { + const unsigned long one_bits, high_bits; +}; + +#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) } + +static inline unsigned long has_zero(unsigned long a, unsigned long *bits, + const struct word_at_a_time *c) +{ + unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits; + *bits = mask; + return mask; +} + +#define prep_zero_mask(a, bits, c) (bits) + +static inline unsigned long create_zero_mask(unsigned long bits) +{ + bits = (bits - 1) & ~bits; + return bits >> 7; +} + +static inline unsigned long find_zero(unsigned long mask) +{ + unsigned long ret; + +#if __LINUX_ARM_ARCH__ >= 5 + /* We have clz available. */ + ret = fls(mask) >> 3; +#else + /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */ + ret = (0x0ff0001 + mask) >> 23; + /* Fix the 1 for 00 case */ + ret &= mask; +#endif + + return ret; +} + +#else /* __ARMEB__ */ +#include +#endif + +#endif /* __ASM_ARM_WORD_AT_A_TIME_H */ -- cgit From b9a50f74905ad9126c91b495ece8a5f45434c643 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jul 2012 15:46:08 +0100 Subject: ARM: 7450/1: dcache: select DCACHE_WORD_ACCESS for little-endian ARMv6+ CPUs DCACHE_WORD_ACCESS uses the word-at-a-time API for optimised string comparisons in the vfs layer. This patch implements support for load_unaligned_zeropad for ARM CPUs with native support for unaligned memory accesses (v6+) when running little-endian. Reviewed-by: Nicolas Pitre Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/word-at-a-time.h | 41 +++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/word-at-a-time.h b/arch/arm/include/asm/word-at-a-time.h index 74b2d4578577..4d52f92967a6 100644 --- a/arch/arm/include/asm/word-at-a-time.h +++ b/arch/arm/include/asm/word-at-a-time.h @@ -48,6 +48,47 @@ static inline unsigned long find_zero(unsigned long mask) return ret; } +#ifdef CONFIG_DCACHE_WORD_ACCESS + +#define zero_bytemask(mask) (mask) + +/* + * Load an unaligned word from kernel space. + * + * In the (very unlikely) case of the word being a page-crosser + * and the next page not being mapped, take the exception and + * return zeroes in the non-existing part. + */ +static inline unsigned long load_unaligned_zeropad(const void *addr) +{ + unsigned long ret, offset; + + /* Load word from unaligned pointer addr */ + asm( + "1: ldr %0, [%2]\n" + "2:\n" + " .pushsection .fixup,\"ax\"\n" + " .align 2\n" + "3: and %1, %2, #0x3\n" + " bic %2, %2, #0x3\n" + " ldr %0, [%2]\n" + " lsl %1, %1, #0x3\n" + " lsr %0, %0, %1\n" + " b 2b\n" + " .popsection\n" + " .pushsection __ex_table,\"a\"\n" + " .align 3\n" + " .long 1b, 3b\n" + " .popsection" + : "=&r" (ret), "=&r" (offset) + : "r" (addr), "Qo" (*(unsigned long *)addr)); + + return ret; +} + + +#endif /* DCACHE_WORD_ACCESS */ + #else /* __ARMEB__ */ #include #endif -- cgit From 923df96b9f31b7d08d8438ff9677326d9537accf Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jul 2012 15:46:45 +0100 Subject: ARM: 7451/1: arch timer: implement read_current_timer and get_cycles This patch implements read_current_timer using the architected timers when they are selected via CONFIG_ARM_ARCH_TIMER. If they are detected not to be usable at runtime, we return -ENXIO to the caller. Furthermore, if read_current_timer is exported then we can implement get_cycles in terms of it for use as both an entropy source and for implementing __udelay and friends. Tested-by: Shinya Kuribayashi Reviewed-by: Stephen Boyd Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/arch_timer.h | 3 +++ arch/arm/include/asm/timex.h | 10 ++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h index ed2e95d46e29..62e75475e57e 100644 --- a/arch/arm/include/asm/arch_timer.h +++ b/arch/arm/include/asm/arch_timer.h @@ -1,7 +1,10 @@ #ifndef __ASMARM_ARCH_TIMER_H #define __ASMARM_ARCH_TIMER_H +#include + #ifdef CONFIG_ARM_ARCH_TIMER +#define ARCH_HAS_READ_CURRENT_TIMER int arch_timer_of_register(void); int arch_timer_sched_clock_init(void); #else diff --git a/arch/arm/include/asm/timex.h b/arch/arm/include/asm/timex.h index 3be8de3adaba..ce119442277c 100644 --- a/arch/arm/include/asm/timex.h +++ b/arch/arm/include/asm/timex.h @@ -12,13 +12,15 @@ #ifndef _ASMARM_TIMEX_H #define _ASMARM_TIMEX_H +#include #include typedef unsigned long cycles_t; -static inline cycles_t get_cycles (void) -{ - return 0; -} +#ifdef ARCH_HAS_READ_CURRENT_TIMER +#define get_cycles() ({ cycles_t c; read_current_timer(&c) ? 0 : c; }) +#else +#define get_cycles() (0) +#endif #endif -- cgit From d0a533b18235d36206b9b422efadb7cee444dfdb Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jul 2012 15:47:17 +0100 Subject: ARM: 7452/1: delay: allow timer-based delay implementation to be selected This patch allows a timer-based delay implementation to be selected by switching the delay routines over to use get_cycles, which is implemented in terms of read_current_timer. This further allows us to skip the loop calibration and have a consistent delay function in the face of core frequency scaling. To avoid the pain of dealing with memory-mapped counters, this implementation uses the co-processor interface to the architected timers when they are available. The previous loop-based implementation is kept around for CPUs without the architected timers and we retain both the maximum delay (2ms) and the corresponding conversion factors for determining the number of loops required for a given interval. Since the indirection of the timer routines will only work when called from C, the sa1100 sleep routines are modified to branch to the loop-based delay functions directly. Tested-by: Shinya Kuribayashi Reviewed-by: Stephen Boyd Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/delay.h | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/delay.h b/arch/arm/include/asm/delay.h index b2deda181549..dc6145120de3 100644 --- a/arch/arm/include/asm/delay.h +++ b/arch/arm/include/asm/delay.h @@ -6,9 +6,22 @@ #ifndef __ASM_ARM_DELAY_H #define __ASM_ARM_DELAY_H +#include #include /* HZ */ -extern void __delay(int loops); +#define MAX_UDELAY_MS 2 +#define UDELAY_MULT ((UL(2199023) * HZ) >> 11) +#define UDELAY_SHIFT 30 + +#ifndef __ASSEMBLY__ + +extern struct arm_delay_ops { + void (*delay)(unsigned long); + void (*const_udelay)(unsigned long); + void (*udelay)(unsigned long); +} arm_delay_ops; + +#define __delay(n) arm_delay_ops.delay(n) /* * This function intentionally does not exist; if you see references to @@ -23,22 +36,27 @@ extern void __bad_udelay(void); * division by multiplication: you don't have to worry about * loss of precision. * - * Use only for very small delays ( < 1 msec). Should probably use a + * Use only for very small delays ( < 2 msec). Should probably use a * lookup table, really, as the multiplications take much too long with * short delays. This is a "reasonable" implementation, though (and the * first constant multiplications gets optimized away if the delay is * a constant) */ -extern void __udelay(unsigned long usecs); -extern void __const_udelay(unsigned long); - -#define MAX_UDELAY_MS 2 +#define __udelay(n) arm_delay_ops.udelay(n) +#define __const_udelay(n) arm_delay_ops.const_udelay(n) #define udelay(n) \ (__builtin_constant_p(n) ? \ ((n) > (MAX_UDELAY_MS * 1000) ? __bad_udelay() : \ - __const_udelay((n) * ((2199023U*HZ)>>11))) : \ + __const_udelay((n) * UDELAY_MULT)) : \ __udelay(n)) +/* Loop-based definitions for assembly code. */ +extern void __loop_delay(unsigned long loops); +extern void __loop_udelay(unsigned long usecs); +extern void __loop_const_udelay(unsigned long); + +#endif /* __ASSEMBLY__ */ + #endif /* defined(_ARM_DELAY_H) */ -- cgit From 4b7a7d889007ec22d4df741cc08ef79583d11189 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 25 Nov 2011 22:53:10 +0800 Subject: arm: remove km_type definitions Signed-off-by: Cong Wang --- arch/arm/include/asm/kmap_types.h | 26 +------------------------- 1 file changed, 1 insertion(+), 25 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/kmap_types.h b/arch/arm/include/asm/kmap_types.h index e51b1e81df05..83eb2f772911 100644 --- a/arch/arm/include/asm/kmap_types.h +++ b/arch/arm/include/asm/kmap_types.h @@ -4,30 +4,6 @@ /* * This is the "bare minimum". AIO seems to require this. */ -enum km_type { - KM_BOUNCE_READ, - KM_SKB_SUNRPC_DATA, - KM_SKB_DATA_SOFTIRQ, - KM_USER0, - KM_USER1, - KM_BIO_SRC_IRQ, - KM_BIO_DST_IRQ, - KM_PTE0, - KM_PTE1, - KM_IRQ0, - KM_IRQ1, - KM_SOFTIRQ0, - KM_SOFTIRQ1, - KM_L1_CACHE, - KM_L2_CACHE, - KM_KDB, - KM_TYPE_NR -}; - -#ifdef CONFIG_DEBUG_HIGHMEM -#define KM_NMI (-1) -#define KM_NMI_PTE (-1) -#define KM_IRQ_PTE (-1) -#endif +#define KM_TYPE_NR 16 #endif -- cgit From a5d5f7daa744b34477c4a12728bde0a1694a1707 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 12 Jul 2012 23:57:35 +0100 Subject: ARM: 7465/1: Handle >4GB memory sizes in device tree and mem=size@start option The memory regions which are passed to arm_add_memory() from device tree blobs via early_init_dt_add_memory_arch() can have sizes which are larger than will fit in a 32 bit integer, so switch to using a phys_addr_t to hold them, to avoid silently dropping the top 32 bits of the size. Similarly, use phys_addr_t in early_mem() so that mem=size@start command line options specifying more than 4GB behave sensibly. Acked-by: Will Deacon Signed-off-by: Peter Maydell Signed-off-by: Russell King --- arch/arm/include/asm/setup.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/setup.h b/arch/arm/include/asm/setup.h index 23ebc0c82a39..24d284a1bfc7 100644 --- a/arch/arm/include/asm/setup.h +++ b/arch/arm/include/asm/setup.h @@ -196,7 +196,7 @@ static const struct tagtable __tagtable_##fn __tag = { tag, fn } struct membank { phys_addr_t start; - unsigned long size; + phys_addr_t size; unsigned int highmem; }; @@ -217,7 +217,7 @@ extern struct meminfo meminfo; #define bank_phys_end(bank) ((bank)->start + (bank)->size) #define bank_phys_size(bank) (bank)->size -extern int arm_add_memory(phys_addr_t start, unsigned long size); +extern int arm_add_memory(phys_addr_t start, phys_addr_t size); extern void early_print(const char *str, ...); extern void dump_machine_table(void); -- cgit From e9da6e9905e639b0f842a244bc770b48ad0523e9 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 30 Jul 2012 09:11:33 +0200 Subject: ARM: dma-mapping: remove custom consistent dma region This patch changes dma-mapping subsystem to use generic vmalloc areas for all consistent dma allocations. This increases the total size limit of the consistent allocations and removes platform hacks and a lot of duplicated code. Atomic allocations are served from special pool preallocated on boot, because vmalloc areas cannot be reliably created in atomic context. Signed-off-by: Marek Szyprowski Reviewed-by: Kyungmin Park Reviewed-by: Minchan Kim --- arch/arm/include/asm/dma-mapping.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index bbef15d04890..80777d871422 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -226,7 +226,7 @@ static inline int dma_mmap_writecombine(struct device *dev, struct vm_area_struc * DMA region above it's default value of 2MB. It must be called before the * memory allocator is initialised, i.e. before any core_initcall. */ -extern void __init init_consistent_dma_size(unsigned long size); +static inline void init_consistent_dma_size(unsigned long size) { } /* * For SA-1111, IXP425, and ADI systems the dma-mapping functions are "magic" -- cgit From 64ccc9c033c6089b2d426dad3c56477ab066c999 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 14 Jun 2012 13:03:04 +0200 Subject: common: dma-mapping: add support for generic dma_mmap_* calls Commit 9adc5374 ('common: dma-mapping: introduce mmap method') added a generic method for implementing mmap user call to dma_map_ops structure. This patch converts ARM and PowerPC architectures (the only providers of dma_mmap_coherent/dma_mmap_writecombine calls) to use this generic dma_map_ops based call and adds a generic cross architecture definition for dma_mmap_attrs, dma_mmap_coherent, dma_mmap_writecombine functions. The generic mmap virt_to_page-based fallback implementation is provided for architectures which don't provide their own implementation for mmap method. Signed-off-by: Marek Szyprowski Reviewed-by: Kyungmin Park --- arch/arm/include/asm/dma-mapping.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index 80777d871422..a04803331144 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -186,17 +186,6 @@ extern int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, struct dma_attrs *attrs); -#define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, NULL) - -static inline int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, - size_t size, struct dma_attrs *attrs) -{ - struct dma_map_ops *ops = get_dma_ops(dev); - BUG_ON(!ops); - return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs); -} - static inline void *dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag) { @@ -213,14 +202,6 @@ static inline void dma_free_writecombine(struct device *dev, size_t size, return dma_free_attrs(dev, size, cpu_addr, dma_handle, &attrs); } -static inline int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size) -{ - DEFINE_DMA_ATTRS(attrs); - dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); - return dma_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, &attrs); -} - /* * This can be called during boot to increase the size of the consistent * DMA region above it's default value of 2MB. It must be called before the -- cgit From dc2832e1e7db3f9ad465d2fe894bd69ef05d1e4b Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 13 Jun 2012 10:01:15 +0200 Subject: ARM: dma-mapping: add support for dma_get_sgtable() This patch adds support for dma_get_sgtable() function which is required to let drivers to share the buffers allocated by DMA-mapping subsystem. Generic implementation based on virt_to_page() is not suitable for ARM dma-mapping subsystem. Signed-off-by: Marek Szyprowski Reviewed-by: Kyungmin Park --- arch/arm/include/asm/dma-mapping.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index a04803331144..2ae842df4551 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -261,6 +261,9 @@ extern void arm_dma_sync_sg_for_cpu(struct device *, struct scatterlist *, int, enum dma_data_direction); extern void arm_dma_sync_sg_for_device(struct device *, struct scatterlist *, int, enum dma_data_direction); +extern int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + struct dma_attrs *attrs); #endif /* __KERNEL__ */ #endif -- cgit From c1d7e01d7877a397655277a920aeaa3830ed9461 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 30 Jul 2012 14:42:46 -0700 Subject: ipc: use Kconfig options for __ARCH_WANT_[COMPAT_]IPC_PARSE_VERSION Rather than #define the options manually in the architecture code, add Kconfig options for them and select them there instead. This also allows us to select the compat IPC version parsing automatically for platforms using the old compat IPC interface. Reported-by: Andrew Morton Signed-off-by: Will Deacon Cc: Arnd Bergmann Cc: Chris Metcalf Cc: Catalin Marinas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/include/asm/unistd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index 512cd1473454..0cab47d4a83f 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -446,7 +446,6 @@ #ifdef __KERNEL__ -#define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_STAT64 #define __ARCH_WANT_SYS_GETHOSTNAME #define __ARCH_WANT_SYS_PAUSE -- cgit From a76d7bd96d65fa5119adba97e1b58d95f2e78829 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 13 Jul 2012 19:15:40 +0100 Subject: ARM: 7467/1: mutex: use generic xchg-based implementation for ARMv6+ The open-coded mutex implementation for ARMv6+ cores suffers from a severe lack of barriers, so in the uncontended case we don't actually protect any accesses performed during the critical section. Furthermore, the code is largely a duplication of the ARMv6+ atomic_dec code but optimised to remove a branch instruction, as the mutex fastpath was previously inlined. Now that this is executed out-of-line, we can reuse the atomic access code for the locking (in fact, we use the xchg code as this produces shorter critical sections). This patch uses the generic xchg based implementation for mutexes on ARMv6+, which introduces barriers to the lock/unlock operations and also has the benefit of removing a fair amount of inline assembly code. Cc: Acked-by: Arnd Bergmann Acked-by: Nicolas Pitre Reported-by: Shan Kang Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/mutex.h | 119 ++----------------------------------------- 1 file changed, 4 insertions(+), 115 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/mutex.h b/arch/arm/include/asm/mutex.h index 93226cf23ae0..b1479fd04a95 100644 --- a/arch/arm/include/asm/mutex.h +++ b/arch/arm/include/asm/mutex.h @@ -7,121 +7,10 @@ */ #ifndef _ASM_MUTEX_H #define _ASM_MUTEX_H - -#if __LINUX_ARM_ARCH__ < 6 -/* On pre-ARMv6 hardware the swp based implementation is the most efficient. */ -# include -#else - /* - * Attempting to lock a mutex on ARMv6+ can be done with a bastardized - * atomic decrement (it is not a reliable atomic decrement but it satisfies - * the defined semantics for our purpose, while being smaller and faster - * than a real atomic decrement or atomic swap. The idea is to attempt - * decrementing the lock value only once. If once decremented it isn't zero, - * or if its store-back fails due to a dispute on the exclusive store, we - * simply bail out immediately through the slow path where the lock will be - * reattempted until it succeeds. + * On pre-ARMv6 hardware this results in a swp-based implementation, + * which is the most efficient. For ARMv6+, we emit a pair of exclusive + * accesses instead. */ -static inline void -__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *)) -{ - int __ex_flag, __res; - - __asm__ ( - - "ldrex %0, [%2] \n\t" - "sub %0, %0, #1 \n\t" - "strex %1, %0, [%2] " - - : "=&r" (__res), "=&r" (__ex_flag) - : "r" (&(count)->counter) - : "cc","memory" ); - - __res |= __ex_flag; - if (unlikely(__res != 0)) - fail_fn(count); -} - -static inline int -__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *)) -{ - int __ex_flag, __res; - - __asm__ ( - - "ldrex %0, [%2] \n\t" - "sub %0, %0, #1 \n\t" - "strex %1, %0, [%2] " - - : "=&r" (__res), "=&r" (__ex_flag) - : "r" (&(count)->counter) - : "cc","memory" ); - - __res |= __ex_flag; - if (unlikely(__res != 0)) - __res = fail_fn(count); - return __res; -} - -/* - * Same trick is used for the unlock fast path. However the original value, - * rather than the result, is used to test for success in order to have - * better generated assembly. - */ -static inline void -__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) -{ - int __ex_flag, __res, __orig; - - __asm__ ( - - "ldrex %0, [%3] \n\t" - "add %1, %0, #1 \n\t" - "strex %2, %1, [%3] " - - : "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag) - : "r" (&(count)->counter) - : "cc","memory" ); - - __orig |= __ex_flag; - if (unlikely(__orig != 0)) - fail_fn(count); -} - -/* - * If the unlock was done on a contended lock, or if the unlock simply fails - * then the mutex remains locked. - */ -#define __mutex_slowpath_needs_to_unlock() 1 - -/* - * For __mutex_fastpath_trylock we use another construct which could be - * described as a "single value cmpxchg". - * - * This provides the needed trylock semantics like cmpxchg would, but it is - * lighter and less generic than a true cmpxchg implementation. - */ -static inline int -__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) -{ - int __ex_flag, __res, __orig; - - __asm__ ( - - "1: ldrex %0, [%3] \n\t" - "subs %1, %0, #1 \n\t" - "strexeq %2, %1, [%3] \n\t" - "movlt %0, #0 \n\t" - "cmpeq %2, #0 \n\t" - "bgt 1b " - - : "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag) - : "r" (&count->counter) - : "cc", "memory" ); - - return __orig; -} - -#endif +#include #endif -- cgit From b74253f78400f9a4b42da84bb1de7540b88ce7c4 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 23 Jul 2012 14:18:13 +0100 Subject: ARM: 7479/1: mm: avoid NULL dereference when flushing gate_vma with VIVT caches The vivt_flush_cache_{range,page} functions check that the mm_struct of the VMA being flushed has been active on the current CPU before performing the cache maintenance. The gate_vma has a NULL mm_struct pointer and, as such, will cause a kernel fault if we try to flush it with the above operations. This happens during ELF core dumps, which include the gate_vma as it may be useful for debugging purposes. This patch adds checks to the VIVT cache flushing functions so that VMAs with a NULL mm_struct are flushed unconditionally (the vectors page may be dirty if we use it to store the current TLS pointer). Cc: # 3.4+ Reported-by: Gilles Chanteperdrix Tested-by: Uros Bizjak Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/cacheflush.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 004c1bc95d2b..e4448e16046d 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -215,7 +215,9 @@ static inline void vivt_flush_cache_mm(struct mm_struct *mm) static inline void vivt_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) + struct mm_struct *mm = vma->vm_mm; + + if (!mm || cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) __cpuc_flush_user_range(start & PAGE_MASK, PAGE_ALIGN(end), vma->vm_flags); } @@ -223,7 +225,9 @@ vivt_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned static inline void vivt_flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn) { - if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) { + struct mm_struct *mm = vma->vm_mm; + + if (!mm || cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) { unsigned long addr = user_addr & PAGE_MASK; __cpuc_flush_user_range(addr, addr + PAGE_SIZE, vma->vm_flags); } -- cgit From 237ec6f2e51d2fc2ff37c7c5f1ccc9264d09c85b Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Tue, 7 Aug 2012 19:05:10 +0100 Subject: ARM: 7486/1: sched_clock: update epoch_cyc on resume Many clocks that are used to provide sched_clock will reset during suspend. If read_sched_clock returns 0 after suspend, sched_clock will appear to jump forward. This patch resets cd.epoch_cyc to the current value of read_sched_clock during resume, which causes sched_clock() just after suspend to return the same value as sched_clock() just before suspend. In addition, during the window where epoch_ns has been updated before suspend, but epoch_cyc has not been updated after suspend, it is unknown whether the clock has reset or not, and sched_clock() could return a bogus value. Add a suspended flag, and return the pre-suspend epoch_ns value during this period. The new behavior is triggered by calling setup_sched_clock_needs_suspend instead of setup_sched_clock. Signed-off-by: Colin Cross Reviewed-by: Linus Walleij Signed-off-by: Russell King --- arch/arm/include/asm/sched_clock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/sched_clock.h b/arch/arm/include/asm/sched_clock.h index e3f757263438..05b8e82ec9f5 100644 --- a/arch/arm/include/asm/sched_clock.h +++ b/arch/arm/include/asm/sched_clock.h @@ -10,5 +10,7 @@ extern void sched_clock_postinit(void); extern void setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate); +extern void setup_sched_clock_needs_suspend(u32 (*read)(void), int bits, + unsigned long rate); #endif -- cgit From 47f1204329237a0f8655f5a9f14a38ac81946ca1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 10 Aug 2012 17:51:18 +0100 Subject: ARM: 7487/1: mm: avoid setting nG bit for user mappings that aren't present Swap entries are encoding in ptes such that !pte_present(pte) and pte_file(pte). The remaining bits of the descriptor are used to identify the swapfile and offset within it to the swap entry. When writing such a pte for a user virtual address, set_pte_at unconditionally sets the nG bit, which (in the case of LPAE) will corrupt the swapfile offset and lead to a BUG: [ 140.494067] swap_free: Unused swap offset entry 000763b4 [ 140.509989] BUG: Bad page map in process rs:main Q:Reg pte:0ec76800 pmd:8f92e003 This patch fixes the problem by only setting the nG bit for user mappings that are actually present. Cc: Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/pgtable.h | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index f66626d71e7d..d88f9f049e95 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -195,6 +195,18 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd) #define pte_clear(mm,addr,ptep) set_pte_ext(ptep, __pte(0), 0) +#define pte_none(pte) (!pte_val(pte)) +#define pte_present(pte) (pte_val(pte) & L_PTE_PRESENT) +#define pte_write(pte) (!(pte_val(pte) & L_PTE_RDONLY)) +#define pte_dirty(pte) (pte_val(pte) & L_PTE_DIRTY) +#define pte_young(pte) (pte_val(pte) & L_PTE_YOUNG) +#define pte_exec(pte) (!(pte_val(pte) & L_PTE_XN)) +#define pte_special(pte) (0) + +#define pte_present_user(pte) \ + ((pte_val(pte) & (L_PTE_PRESENT | L_PTE_USER)) == \ + (L_PTE_PRESENT | L_PTE_USER)) + #if __LINUX_ARM_ARCH__ < 6 static inline void __sync_icache_dcache(pte_t pteval) { @@ -206,25 +218,15 @@ extern void __sync_icache_dcache(pte_t pteval); static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval) { - if (addr >= TASK_SIZE) - set_pte_ext(ptep, pteval, 0); - else { + unsigned long ext = 0; + + if (addr < TASK_SIZE && pte_present_user(pteval)) { __sync_icache_dcache(pteval); - set_pte_ext(ptep, pteval, PTE_EXT_NG); + ext |= PTE_EXT_NG; } -} -#define pte_none(pte) (!pte_val(pte)) -#define pte_present(pte) (pte_val(pte) & L_PTE_PRESENT) -#define pte_write(pte) (!(pte_val(pte) & L_PTE_RDONLY)) -#define pte_dirty(pte) (pte_val(pte) & L_PTE_DIRTY) -#define pte_young(pte) (pte_val(pte) & L_PTE_YOUNG) -#define pte_exec(pte) (!(pte_val(pte) & L_PTE_XN)) -#define pte_special(pte) (0) - -#define pte_present_user(pte) \ - ((pte_val(pte) & (L_PTE_PRESENT | L_PTE_USER)) == \ - (L_PTE_PRESENT | L_PTE_USER)) + set_pte_ext(ptep, pteval, ext); +} #define PTE_BIT_FUNC(fn,op) \ static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; } -- cgit From f5f2025ef3e2cdb593707cbf87378761f17befbe Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 10 Aug 2012 17:51:19 +0100 Subject: ARM: 7488/1: mm: use 5 bits for swapfile type encoding Page migration encodes the pfn in the offset field of a swp_entry_t. For LPAE, we support physical addresses of up to 36 bits (due to sparsemem limitations with the size of page flags), requiring 24 bits to represent a pfn. A further 3 bits are used to encode a swp_entry into a pte, leaving 5 bits for the type field. Furthermore, the core code defines MAX_SWAPFILES_SHIFT as 5, so the additional type bit does not get used. This patch reduces the width of the type field to 5 bits, allowing us to create up to 31 swapfiles of 64GB each. Cc: Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/pgtable.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index d88f9f049e95..41dc31f834c3 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -253,13 +253,13 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) * * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 - * <--------------- offset --------------------> <- type --> 0 0 0 + * <--------------- offset ----------------------> < type -> 0 0 0 * - * This gives us up to 63 swap files and 32GB per swap file. Note that + * This gives us up to 31 swap files and 64GB per swap file. Note that * the offset field is always non-zero. */ #define __SWP_TYPE_SHIFT 3 -#define __SWP_TYPE_BITS 6 +#define __SWP_TYPE_BITS 5 #define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) #define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) -- cgit