From 3fc24ef32d3b9368f4c103dcd21d6a3f959b4870 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 1 Jul 2022 15:53:22 +0200
Subject: arm64: compat: Implement misalignment fixups for multiword loads

The 32-bit ARM kernel implements fixups on behalf of user space when
using LDM/STM or LDRD/STRD instructions on addresses that are not 32-bit
aligned. This is not something that is supported by the architecture,
but was done anyway to increase compatibility with user space software,
which mostly targeted x86 at the time and did not care about aligned
accesses.

This feature is one of the remaining impediments to being able to switch
to 64-bit kernels on 64-bit capable hardware running 32-bit user space,
so let's implement it for the arm64 compat layer as well.

Note that the intent is to implement the exact same handling of
misaligned multi-word loads and stores as the 32-bit kernel does,
including what appears to be missing support for user space programs
that rely on SETEND to switch to a different byte order and back. Also,
like the 32-bit ARM version, we rely on the faulting address reported by
the CPU to infer the memory address, instead of decoding the instruction
fully to obtain this information.

This implementation is taken from the 32-bit ARM tree, with all pieces
removed that deal with instructions other than LDRD/STRD and LDM/STM, or
that deal with alignment exceptions taken in kernel mode.

Cc: debian-arm@lists.debian.org
Cc: Vagrant Cascadian <vagrant@debian.org>
Cc: Riku Voipio <riku.voipio@iki.fi>
Cc: Steve McIntyre <steve@einval.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20220701135322.3025321-1-ardb@kernel.org
[catalin.marinas@arm.com: change the option to 'default n']
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/Kconfig                   |   3 +
 arch/arm64/include/asm/exception.h   |   1 +
 arch/arm64/kernel/Makefile           |   1 +
 arch/arm64/kernel/compat_alignment.c | 387 +++++++++++++++++++++++++++++++++++
 arch/arm64/mm/fault.c                |   3 +
 5 files changed, 395 insertions(+)
 create mode 100644 arch/arm64/kernel/compat_alignment.c

(limited to 'arch/arm64')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9fb9fff08c94..b17d6861a3ad 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1562,6 +1562,9 @@ config THUMB2_COMPAT_VDSO
 	  Compile the compat vDSO with '-mthumb -fomit-frame-pointer' if y,
 	  otherwise with '-marm'.
 
+config COMPAT_ALIGNMENT_FIXUPS
+	bool "Fix up misaligned multi-word loads and stores in user space"
+
 menuconfig ARMV8_DEPRECATED
 	bool "Emulate deprecated/obsolete ARMv8 instructions"
 	depends on SYSCTL
diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index d94aecff9690..e92ca08f754c 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -70,6 +70,7 @@ void do_sysinstr(unsigned long esr, struct pt_regs *regs);
 void do_sp_pc_abort(unsigned long addr, unsigned long esr, struct pt_regs *regs);
 void bad_el0_sync(struct pt_regs *regs, int reason, unsigned long esr);
 void do_cp15instr(unsigned long esr, struct pt_regs *regs);
+int do_compat_alignment_fixup(unsigned long addr, struct pt_regs *regs);
 void do_el0_svc(struct pt_regs *regs);
 void do_el0_svc_compat(struct pt_regs *regs);
 void do_ptrauth_fault(struct pt_regs *regs, unsigned long esr);
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 1add7b01efa7..38a0b0291edb 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -45,6 +45,7 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE
 obj-$(CONFIG_COMPAT)			+= sys32.o signal32.o			\
 					   sys_compat.o
 obj-$(CONFIG_COMPAT)			+= sigreturn32.o
+obj-$(CONFIG_COMPAT_ALIGNMENT_FIXUPS)	+= compat_alignment.o
 obj-$(CONFIG_KUSER_HELPERS)		+= kuser32.o
 obj-$(CONFIG_FUNCTION_TRACER)		+= ftrace.o entry-ftrace.o
 obj-$(CONFIG_MODULES)			+= module.o
diff --git a/arch/arm64/kernel/compat_alignment.c b/arch/arm64/kernel/compat_alignment.c
new file mode 100644
index 000000000000..5edec2f49ec9
--- /dev/null
+++ b/arch/arm64/kernel/compat_alignment.c
@@ -0,0 +1,387 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// based on arch/arm/mm/alignment.c
+
+#include <linux/compiler.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/perf_event.h>
+#include <linux/uaccess.h>
+
+#include <asm/exception.h>
+#include <asm/ptrace.h>
+#include <asm/traps.h>
+
+/*
+ * 32-bit misaligned trap handler (c) 1998 San Mehat (CCC) -July 1998
+ *
+ * Speed optimisations and better fault handling by Russell King.
+ */
+#define CODING_BITS(i)	(i & 0x0e000000)
+
+#define LDST_P_BIT(i)	(i & (1 << 24))		/* Preindex		*/
+#define LDST_U_BIT(i)	(i & (1 << 23))		/* Add offset		*/
+#define LDST_W_BIT(i)	(i & (1 << 21))		/* Writeback		*/
+#define LDST_L_BIT(i)	(i & (1 << 20))		/* Load			*/
+
+#define LDST_P_EQ_U(i)	((((i) ^ ((i) >> 1)) & (1 << 23)) == 0)
+
+#define LDSTHD_I_BIT(i)	(i & (1 << 22))		/* double/half-word immed */
+
+#define RN_BITS(i)	((i >> 16) & 15)	/* Rn			*/
+#define RD_BITS(i)	((i >> 12) & 15)	/* Rd			*/
+#define RM_BITS(i)	(i & 15)		/* Rm			*/
+
+#define REGMASK_BITS(i)	(i & 0xffff)
+
+#define BAD_INSTR 	0xdeadc0de
+
+/* Thumb-2 32 bit format per ARMv7 DDI0406A A6.3, either f800h,e800h,f800h */
+#define IS_T32(hi16) \
+	(((hi16) & 0xe000) == 0xe000 && ((hi16) & 0x1800))
+
+union offset_union {
+	unsigned long un;
+	  signed long sn;
+};
+
+#define TYPE_ERROR	0
+#define TYPE_FAULT	1
+#define TYPE_LDST	2
+#define TYPE_DONE	3
+
+static void
+do_alignment_finish_ldst(unsigned long addr, u32 instr, struct pt_regs *regs,
+			 union offset_union offset)
+{
+	if (!LDST_U_BIT(instr))
+		offset.un = -offset.un;
+
+	if (!LDST_P_BIT(instr))
+		addr += offset.un;
+
+	if (!LDST_P_BIT(instr) || LDST_W_BIT(instr))
+		regs->regs[RN_BITS(instr)] = addr;
+}
+
+static int
+do_alignment_ldrdstrd(unsigned long addr, u32 instr, struct pt_regs *regs)
+{
+	unsigned int rd = RD_BITS(instr);
+	unsigned int rd2;
+	int load;
+
+	if ((instr & 0xfe000000) == 0xe8000000) {
+		/* ARMv7 Thumb-2 32-bit LDRD/STRD */
+		rd2 = (instr >> 8) & 0xf;
+		load = !!(LDST_L_BIT(instr));
+	} else if (((rd & 1) == 1) || (rd == 14)) {
+		return TYPE_ERROR;
+	} else {
+		load = ((instr & 0xf0) == 0xd0);
+		rd2 = rd + 1;
+	}
+
+	if (load) {
+		unsigned int val, val2;
+
+		if (get_user(val, (u32 __user *)addr) ||
+		    get_user(val2, (u32 __user *)(addr + 4)))
+			return TYPE_FAULT;
+		regs->regs[rd] = val;
+		regs->regs[rd2] = val2;
+	} else {
+		if (put_user(regs->regs[rd], (u32 __user *)addr) ||
+		    put_user(regs->regs[rd2], (u32 __user *)(addr + 4)))
+			return TYPE_FAULT;
+	}
+	return TYPE_LDST;
+}
+
+/*
+ * LDM/STM alignment handler.
+ *
+ * There are 4 variants of this instruction:
+ *
+ * B = rn pointer before instruction, A = rn pointer after instruction
+ *              ------ increasing address ----->
+ *	        |    | r0 | r1 | ... | rx |    |
+ * PU = 01             B                    A
+ * PU = 11        B                    A
+ * PU = 00        A                    B
+ * PU = 10             A                    B
+ */
+static int
+do_alignment_ldmstm(unsigned long addr, u32 instr, struct pt_regs *regs)
+{
+	unsigned int rd, rn, nr_regs, regbits;
+	unsigned long eaddr, newaddr;
+	unsigned int val;
+
+	/* count the number of registers in the mask to be transferred */
+	nr_regs = hweight16(REGMASK_BITS(instr)) * 4;
+
+	rn = RN_BITS(instr);
+	newaddr = eaddr = regs->regs[rn];
+
+	if (!LDST_U_BIT(instr))
+		nr_regs = -nr_regs;
+	newaddr += nr_regs;
+	if (!LDST_U_BIT(instr))
+		eaddr = newaddr;
+
+	if (LDST_P_EQ_U(instr))	/* U = P */
+		eaddr += 4;
+
+	for (regbits = REGMASK_BITS(instr), rd = 0; regbits;
+	     regbits >>= 1, rd += 1)
+		if (regbits & 1) {
+			if (LDST_L_BIT(instr)) {
+				if (get_user(val, (u32 __user *)eaddr))
+					return TYPE_FAULT;
+				if (rd < 15)
+					regs->regs[rd] = val;
+				else
+					regs->pc = val;
+			} else {
+				/*
+				 * The PC register has a bias of +8 in ARM mode
+				 * and +4 in Thumb mode. This means that a read
+				 * of the value of PC should account for this.
+				 * Since Thumb does not permit STM instructions
+				 * to refer to PC, just add 8 here.
+				 */
+				val = (rd < 15) ? regs->regs[rd] : regs->pc + 8;
+				if (put_user(val, (u32 __user *)eaddr))
+					return TYPE_FAULT;
+			}
+			eaddr += 4;
+		}
+
+	if (LDST_W_BIT(instr))
+		regs->regs[rn] = newaddr;
+
+	return TYPE_DONE;
+}
+
+/*
+ * Convert Thumb multi-word load/store instruction forms to equivalent ARM
+ * instructions so we can reuse ARM userland alignment fault fixups for Thumb.
+ *
+ * This implementation was initially based on the algorithm found in
+ * gdb/sim/arm/thumbemu.c. It is basically just a code reduction of same
+ * to convert only Thumb ld/st instruction forms to equivalent ARM forms.
+ *
+ * NOTES:
+ * 1. Comments below refer to ARM ARM DDI0100E Thumb Instruction sections.
+ * 2. If for some reason we're passed an non-ld/st Thumb instruction to
+ *    decode, we return 0xdeadc0de. This should never happen under normal
+ *    circumstances but if it does, we've got other problems to deal with
+ *    elsewhere and we obviously can't fix those problems here.
+ */
+
+static unsigned long thumb2arm(u16 tinstr)
+{
+	u32 L = (tinstr & (1<<11)) >> 11;
+
+	switch ((tinstr & 0xf800) >> 11) {
+	/* 6.6.1 Format 1: */
+	case 0xc000 >> 11:				/* 7.1.51 STMIA */
+	case 0xc800 >> 11:				/* 7.1.25 LDMIA */
+		{
+			u32 Rn = (tinstr & (7<<8)) >> 8;
+			u32 W = ((L<<Rn) & (tinstr&255)) ? 0 : 1<<21;
+
+			return 0xe8800000 | W | (L<<20) | (Rn<<16) |
+				(tinstr&255);
+		}
+
+	/* 6.6.1 Format 2: */
+	case 0xb000 >> 11:				/* 7.1.48 PUSH */
+	case 0xb800 >> 11:				/* 7.1.47 POP */
+		if ((tinstr & (3 << 9)) == 0x0400) {
+			static const u32 subset[4] = {
+				0xe92d0000,	/* STMDB sp!,{registers} */
+				0xe92d4000,	/* STMDB sp!,{registers,lr} */
+				0xe8bd0000,	/* LDMIA sp!,{registers} */
+				0xe8bd8000	/* LDMIA sp!,{registers,pc} */
+			};
+			return subset[(L<<1) | ((tinstr & (1<<8)) >> 8)] |
+			    (tinstr & 255);		/* register_list */
+		}
+		fallthrough;	/* for illegal instruction case */
+
+	default:
+		return BAD_INSTR;
+	}
+}
+
+/*
+ * Convert Thumb-2 32 bit LDM, STM, LDRD, STRD to equivalent instruction
+ * handlable by ARM alignment handler, also find the corresponding handler,
+ * so that we can reuse ARM userland alignment fault fixups for Thumb.
+ *
+ * @pinstr: original Thumb-2 instruction; returns new handlable instruction
+ * @regs: register context.
+ * @poffset: return offset from faulted addr for later writeback
+ *
+ * NOTES:
+ * 1. Comments below refer to ARMv7 DDI0406A Thumb Instruction sections.
+ * 2. Register name Rt from ARMv7 is same as Rd from ARMv6 (Rd is Rt)
+ */
+static void *
+do_alignment_t32_to_handler(u32 *pinstr, struct pt_regs *regs,
+			    union offset_union *poffset)
+{
+	u32 instr = *pinstr;
+	u16 tinst1 = (instr >> 16) & 0xffff;
+	u16 tinst2 = instr & 0xffff;
+
+	switch (tinst1 & 0xffe0) {
+	/* A6.3.5 Load/Store multiple */
+	case 0xe880:		/* STM/STMIA/STMEA,LDM/LDMIA, PUSH/POP T2 */
+	case 0xe8a0:		/* ...above writeback version */
+	case 0xe900:		/* STMDB/STMFD, LDMDB/LDMEA */
+	case 0xe920:		/* ...above writeback version */
+		/* no need offset decision since handler calculates it */
+		return do_alignment_ldmstm;
+
+	case 0xf840:		/* POP/PUSH T3 (single register) */
+		if (RN_BITS(instr) == 13 && (tinst2 & 0x09ff) == 0x0904) {
+			u32 L = !!(LDST_L_BIT(instr));
+			const u32 subset[2] = {
+				0xe92d0000,	/* STMDB sp!,{registers} */
+				0xe8bd0000,	/* LDMIA sp!,{registers} */
+			};
+			*pinstr = subset[L] | (1<<RD_BITS(instr));
+			return do_alignment_ldmstm;
+		}
+		/* Else fall through for illegal instruction case */
+		break;
+
+	/* A6.3.6 Load/store double, STRD/LDRD(immed, lit, reg) */
+	case 0xe860:
+	case 0xe960:
+	case 0xe8e0:
+	case 0xe9e0:
+		poffset->un = (tinst2 & 0xff) << 2;
+		fallthrough;
+
+	case 0xe940:
+	case 0xe9c0:
+		return do_alignment_ldrdstrd;
+
+	/*
+	 * No need to handle load/store instructions up to word size
+	 * since ARMv6 and later CPUs can perform unaligned accesses.
+	 */
+	default:
+		break;
+	}
+	return NULL;
+}
+
+static int alignment_get_arm(struct pt_regs *regs, __le32 __user *ip, u32 *inst)
+{
+	__le32 instr = 0;
+	int fault;
+
+	fault = get_user(instr, ip);
+	if (fault)
+		return fault;
+
+	*inst = __le32_to_cpu(instr);
+	return 0;
+}
+
+static int alignment_get_thumb(struct pt_regs *regs, __le16 __user *ip, u16 *inst)
+{
+	__le16 instr = 0;
+	int fault;
+
+	fault = get_user(instr, ip);
+	if (fault)
+		return fault;
+
+	*inst = __le16_to_cpu(instr);
+	return 0;
+}
+
+int do_compat_alignment_fixup(unsigned long addr, struct pt_regs *regs)
+{
+	union offset_union offset;
+	unsigned long instrptr;
+	int (*handler)(unsigned long addr, u32 instr, struct pt_regs *regs);
+	unsigned int type;
+	u32 instr = 0;
+	u16 tinstr = 0;
+	int isize = 4;
+	int thumb2_32b = 0;
+	int fault;
+
+	instrptr = instruction_pointer(regs);
+
+	if (compat_thumb_mode(regs)) {
+		__le16 __user *ptr = (__le16 __user *)(instrptr & ~1);
+
+		fault = alignment_get_thumb(regs, ptr, &tinstr);
+		if (!fault) {
+			if (IS_T32(tinstr)) {
+				/* Thumb-2 32-bit */
+				u16 tinst2;
+				fault = alignment_get_thumb(regs, ptr + 1, &tinst2);
+				instr = ((u32)tinstr << 16) | tinst2;
+				thumb2_32b = 1;
+			} else {
+				isize = 2;
+				instr = thumb2arm(tinstr);
+			}
+		}
+	} else {
+		fault = alignment_get_arm(regs, (__le32 __user *)instrptr, &instr);
+	}
+
+	if (fault)
+		return 1;
+
+	switch (CODING_BITS(instr)) {
+	case 0x00000000:	/* 3.13.4 load/store instruction extensions */
+		if (LDSTHD_I_BIT(instr))
+			offset.un = (instr & 0xf00) >> 4 | (instr & 15);
+		else
+			offset.un = regs->regs[RM_BITS(instr)];
+
+		if ((instr & 0x001000f0) == 0x000000d0 || /* LDRD */
+		    (instr & 0x001000f0) == 0x000000f0)   /* STRD */
+			handler = do_alignment_ldrdstrd;
+		else
+			return 1;
+		break;
+
+	case 0x08000000:	/* ldm or stm, or thumb-2 32bit instruction */
+		if (thumb2_32b) {
+			offset.un = 0;
+			handler = do_alignment_t32_to_handler(&instr, regs, &offset);
+		} else {
+			offset.un = 0;
+			handler = do_alignment_ldmstm;
+		}
+		break;
+
+	default:
+		return 1;
+	}
+
+	type = handler(addr, instr, regs);
+
+	if (type == TYPE_ERROR || type == TYPE_FAULT)
+		return 1;
+
+	if (type == TYPE_LDST)
+		do_alignment_finish_ldst(addr, instr, regs, offset);
+
+	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, regs->pc);
+	arm64_skip_faulting_instruction(regs, isize);
+
+	return 0;
+}
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index c33f1fad2745..5b391490e045 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -691,6 +691,9 @@ static int __kprobes do_translation_fault(unsigned long far,
 static int do_alignment_fault(unsigned long far, unsigned long esr,
 			      struct pt_regs *regs)
 {
+	if (IS_ENABLED(CONFIG_COMPAT_ALIGNMENT_FIXUPS) &&
+	    compat_user_mode(regs))
+		return do_compat_alignment_fixup(far, regs);
 	do_bad_area(far, esr, regs);
 	return 0;
 }
-- 
cgit 


From 8eb858c44b98e0326bb32fca34ae671995cd73bb Mon Sep 17 00:00:00 2001
From: Qi Zheng <zhengqi.arch@bytedance.com>
Date: Mon, 15 Aug 2022 20:47:39 +0800
Subject: arm64: run softirqs on the per-CPU IRQ stack

Currently arm64 supports per-CPU IRQ stack, but softirqs
are still handled in the task context.

Since any call to local_bh_enable() at any level in the task's
call stack may trigger a softirq processing run, which could
potentially cause a task stack overflow if the combined stack
footprints exceed the stack's size, let's run these softirqs
on the IRQ stack as well.

Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20220815124739.15948-1-zhengqi.arch@bytedance.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/Kconfig      |  1 +
 arch/arm64/kernel/irq.c | 14 ++++++++++++++
 2 files changed, 15 insertions(+)

(limited to 'arch/arm64')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b17d6861a3ad..6f1ecf4538f7 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -230,6 +230,7 @@ config ARM64
 	select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD
 	select TRACE_IRQFLAGS_SUPPORT
 	select TRACE_IRQFLAGS_NMI_SUPPORT
+	select HAVE_SOFTIRQ_ON_OWN_STACK
 	help
 	  ARM 64-bit (AArch64) Linux support.
 
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index bda49430c9ea..38dbd3828f13 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -21,7 +21,9 @@
 #include <linux/seq_file.h>
 #include <linux/vmalloc.h>
 #include <asm/daifflags.h>
+#include <asm/exception.h>
 #include <asm/vmap_stack.h>
+#include <asm/softirq_stack.h>
 
 /* Only access this in an NMI enter/exit */
 DEFINE_PER_CPU(struct nmi_ctx, nmi_contexts);
@@ -71,6 +73,18 @@ static void init_irq_stacks(void)
 }
 #endif
 
+#ifndef CONFIG_PREEMPT_RT
+static void ____do_softirq(struct pt_regs *regs)
+{
+	__do_softirq();
+}
+
+void do_softirq_own_stack(void)
+{
+	call_on_irq_stack(NULL, ____do_softirq);
+}
+#endif
+
 static void default_handle_irq(struct pt_regs *regs)
 {
 	panic("IRQ taken without a root IRQ handler\n");
-- 
cgit 


From 877ace9eab7de032f954533afd5d1ecd0cf62eaf Mon Sep 17 00:00:00 2001
From: Liu Song <liusong@linux.alibaba.com>
Date: Fri, 26 Aug 2022 19:40:50 +0800
Subject: arm64: spectre: increase parameters that can be used to turn off bhb
 mitigation individually

In our environment, it was found that the mitigation BHB has a great
impact on the benchmark performance. For example, in the lmbench test,
the "process fork && exit" test performance drops by 20%.
So it is necessary to have the ability to turn off the mitigation
individually through cmdline, thus avoiding having to compile the
kernel by adjusting the config.

Signed-off-by: Liu Song <liusong@linux.alibaba.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/1661514050-22263-1-git-send-email-liusong@linux.alibaba.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/proton-pack.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'arch/arm64')

diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c
index 40be3a7c2c53..bd1690335bb9 100644
--- a/arch/arm64/kernel/proton-pack.c
+++ b/arch/arm64/kernel/proton-pack.c
@@ -988,6 +988,14 @@ static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot)
 	isb();
 }
 
+static bool __read_mostly __nospectre_bhb;
+static int __init parse_spectre_bhb_param(char *str)
+{
+	__nospectre_bhb = true;
+	return 0;
+}
+early_param("nospectre_bhb", parse_spectre_bhb_param);
+
 void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
 {
 	bp_hardening_cb_t cpu_cb;
@@ -1001,7 +1009,7 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
 		/* No point mitigating Spectre-BHB alone. */
 	} else if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY)) {
 		pr_info_once("spectre-bhb mitigation disabled by compile time option\n");
-	} else if (cpu_mitigations_off()) {
+	} else if (cpu_mitigations_off() || __nospectre_bhb) {
 		pr_info_once("spectre-bhb mitigation disabled by command line option\n");
 	} else if (supports_ecbhb(SCOPE_LOCAL_CPU)) {
 		state = SPECTRE_MITIGATED;
-- 
cgit 


From e92072237e6c8497cad5e9d2f2e3c44bb9d26aef Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Sun, 11 Sep 2022 12:44:23 +0800
Subject: arm64: support huge vmalloc mappings

As commit 559089e0a93d ("vmalloc: replace VM_NO_HUGE_VMAP with
VM_ALLOW_HUGE_VMAP"), the use of hugepage mappings for vmalloc
is an opt-in strategy, so it is saftly to support huge vmalloc
mappings on arm64, for now, it is used in kvmalloc() and
alloc_large_system_hash().

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Link: https://lore.kernel.org/r/20220911044423.139229-1-wangkefeng.wang@huawei.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/arm64')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 6f1ecf4538f7..7dbcf2076385 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -149,6 +149,7 @@ config ARM64
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_BITREVERSE
 	select HAVE_ARCH_COMPILER_H
+	select HAVE_ARCH_HUGE_VMALLOC
 	select HAVE_ARCH_HUGE_VMAP
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_JUMP_LABEL_RELATIVE
-- 
cgit 


From c44094eee32f32f175aadc0efcac449d99b1bbf7 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Tue, 23 Aug 2022 13:21:11 +0100
Subject: arm64: dma: Drop cache invalidation from arch_dma_prep_coherent()

arch_dma_prep_coherent() is called when preparing a non-cacheable region
for a consistent DMA buffer allocation. Since the buffer pages may
previously have been written via a cacheable mapping and consequently
allocated as dirty cachelines, the purpose of this function is to remove
these dirty lines from the cache, writing them back so that the
non-coherent device is able to see them.

On arm64, this operation can be achieved with a clean to the point of
coherency; a subsequent invalidation is not required and serves little
purpose in the presence of a cacheable alias (e.g. the linear map),
since clean lines can be speculatively fetched back into the cache after
the invalidation operation has completed.

Relax the cache maintenance in arch_dma_prep_coherent() so that only a
clean, and not a clean-and-invalidate operation is performed.

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220823122111.17439-1-will@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/dma-mapping.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arm64')

diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 599cf81f5685..83a512a6ff0d 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -36,7 +36,7 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
 {
 	unsigned long start = (unsigned long)page_address(page);
 
-	dcache_clean_inval_poc(start, start + size);
+	dcache_clean_poc(start, start + size);
 }
 
 #ifdef CONFIG_IOMMU_DMA
-- 
cgit 


From 739e49e0fc80990a351961c99a3142094822f040 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Tue, 20 Sep 2022 09:49:51 +0800
Subject: arm64: mm: handle ARM64_KERNEL_USES_PMD_MAPS in vmemmap_populate()

Directly check ARM64_SWAPPER_USES_SECTION_MAPS to choose base page
or PMD level huge page mapping in vmemmap_populate() to simplify
code a bit.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Link: https://lore.kernel.org/r/20220920014951.196191-1-wangkefeng.wang@huawei.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/mmu.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

(limited to 'arch/arm64')

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index e7ad44585f40..69deed27dec8 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1180,14 +1180,6 @@ static void free_empty_tables(unsigned long addr, unsigned long end,
 }
 #endif
 
-#if !ARM64_KERNEL_USES_PMD_MAPS
-int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
-		struct vmem_altmap *altmap)
-{
-	WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END));
-	return vmemmap_populate_basepages(start, end, node, altmap);
-}
-#else	/* !ARM64_KERNEL_USES_PMD_MAPS */
 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 		struct vmem_altmap *altmap)
 {
@@ -1199,6 +1191,10 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 	pmd_t *pmdp;
 
 	WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END));
+
+	if (!ARM64_KERNEL_USES_PMD_MAPS)
+		return vmemmap_populate_basepages(start, end, node, altmap);
+
 	do {
 		next = pmd_addr_end(addr, end);
 
@@ -1232,7 +1228,6 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 
 	return 0;
 }
-#endif	/* !ARM64_KERNEL_USES_PMD_MAPS */
 
 #ifdef CONFIG_MEMORY_HOTPLUG
 void vmemmap_free(unsigned long start, unsigned long end,
-- 
cgit 


From 973b9e37330656dec719ede508e4dc40e5c2d80c Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Thu, 15 Sep 2022 15:20:53 -0700
Subject: arm64: mte: move register initialization to C

If FEAT_MTE2 is disabled via the arm64.nomte command line argument on a
CPU that claims to support FEAT_MTE2, the kernel will use Tagged Normal
in the MAIR. If we interpret arm64.nomte to mean that the CPU does not
in fact implement FEAT_MTE2, setting the system register like this may
lead to UNSPECIFIED behavior. Fix it by arranging for MAIR to be set
in the C function cpu_enable_mte which is called based on the sanitized
version of the system register.

There is no need for the rest of the MTE-related system register
initialization to happen from assembly, with the exception of TCR_EL1,
which must be set to include at least TBI1 because the secondary CPUs
access KASan-allocated data structures early. Therefore, make the TCR_EL1
initialization unconditional and move the rest of the initialization to
cpu_enable_mte so that we no longer have a dependency on the unsanitized
ID register value.

Co-developed-by: Evgenii Stepanov <eugenis@google.com>
Signed-off-by: Peter Collingbourne <pcc@google.com>
Signed-off-by: Evgenii Stepanov <eugenis@google.com>
Suggested-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: kernel test robot <lkp@intel.com>
Fixes: 3b714d24ef17 ("arm64: mte: CPU feature detection and initial sysreg configuration")
Cc: <stable@vger.kernel.org> # 5.10.x
Link: https://lore.kernel.org/r/20220915222053.3484231-1-eugenis@google.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/mte.h   |  5 +++++
 arch/arm64/kernel/cpufeature.c |  3 ++-
 arch/arm64/kernel/mte.c        | 51 ++++++++++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/suspend.c    |  2 ++
 arch/arm64/mm/proc.S           | 46 +++++--------------------------------
 5 files changed, 65 insertions(+), 42 deletions(-)

(limited to 'arch/arm64')

diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index aa523591a44e..760c62f8e22f 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -42,7 +42,9 @@ void mte_sync_tags(pte_t old_pte, pte_t pte);
 void mte_copy_page_tags(void *kto, const void *kfrom);
 void mte_thread_init_user(void);
 void mte_thread_switch(struct task_struct *next);
+void mte_cpu_setup(void);
 void mte_suspend_enter(void);
+void mte_suspend_exit(void);
 long set_mte_ctrl(struct task_struct *task, unsigned long arg);
 long get_mte_ctrl(struct task_struct *task);
 int mte_ptrace_copy_tags(struct task_struct *child, long request,
@@ -72,6 +74,9 @@ static inline void mte_thread_switch(struct task_struct *next)
 static inline void mte_suspend_enter(void)
 {
 }
+static inline void mte_suspend_exit(void)
+{
+}
 static inline long set_mte_ctrl(struct task_struct *task, unsigned long arg)
 {
 	return 0;
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index af4de817d712..d7a077b5ccd1 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -2034,7 +2034,8 @@ static void bti_enable(const struct arm64_cpu_capabilities *__unused)
 static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
 {
 	sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_ATA | SCTLR_EL1_ATA0);
-	isb();
+
+	mte_cpu_setup();
 
 	/*
 	 * Clear the tags in the zero page. This needs to be done via the
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index b2b730233274..aca88470fb69 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -285,6 +285,49 @@ void mte_thread_switch(struct task_struct *next)
 	mte_check_tfsr_el1();
 }
 
+void mte_cpu_setup(void)
+{
+	u64 rgsr;
+
+	/*
+	 * CnP must be enabled only after the MAIR_EL1 register has been set
+	 * up. Inconsistent MAIR_EL1 between CPUs sharing the same TLB may
+	 * lead to the wrong memory type being used for a brief window during
+	 * CPU power-up.
+	 *
+	 * CnP is not a boot feature so MTE gets enabled before CnP, but let's
+	 * make sure that is the case.
+	 */
+	BUG_ON(read_sysreg(ttbr0_el1) & TTBR_CNP_BIT);
+	BUG_ON(read_sysreg(ttbr1_el1) & TTBR_CNP_BIT);
+
+	/* Normal Tagged memory type at the corresponding MAIR index */
+	sysreg_clear_set(mair_el1,
+			 MAIR_ATTRIDX(MAIR_ATTR_MASK, MT_NORMAL_TAGGED),
+			 MAIR_ATTRIDX(MAIR_ATTR_NORMAL_TAGGED,
+				      MT_NORMAL_TAGGED));
+
+	write_sysreg_s(KERNEL_GCR_EL1, SYS_GCR_EL1);
+
+	/*
+	 * If GCR_EL1.RRND=1 is implemented the same way as RRND=0, then
+	 * RGSR_EL1.SEED must be non-zero for IRG to produce
+	 * pseudorandom numbers. As RGSR_EL1 is UNKNOWN out of reset, we
+	 * must initialize it.
+	 */
+	rgsr = (read_sysreg(CNTVCT_EL0) & SYS_RGSR_EL1_SEED_MASK) <<
+	       SYS_RGSR_EL1_SEED_SHIFT;
+	if (rgsr == 0)
+		rgsr = 1 << SYS_RGSR_EL1_SEED_SHIFT;
+	write_sysreg_s(rgsr, SYS_RGSR_EL1);
+
+	/* clear any pending tag check faults in TFSR*_EL1 */
+	write_sysreg_s(0, SYS_TFSR_EL1);
+	write_sysreg_s(0, SYS_TFSRE0_EL1);
+
+	local_flush_tlb_all();
+}
+
 void mte_suspend_enter(void)
 {
 	if (!system_supports_mte())
@@ -301,6 +344,14 @@ void mte_suspend_enter(void)
 	mte_check_tfsr_el1();
 }
 
+void mte_suspend_exit(void)
+{
+	if (!system_supports_mte())
+		return;
+
+	mte_cpu_setup();
+}
+
 long set_mte_ctrl(struct task_struct *task, unsigned long arg)
 {
 	u64 mte_ctrl = (~((arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT) &
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 9135fe0f3df5..8b02d310838f 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -43,6 +43,8 @@ void notrace __cpu_suspend_exit(void)
 {
 	unsigned int cpu = smp_processor_id();
 
+	mte_suspend_exit();
+
 	/*
 	 * We are resuming from reset with the idmap active in TTBR0_EL1.
 	 * We must uninstall the idmap and restore the expected MMU
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 7837a69524c5..f38bccdd374a 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -48,17 +48,19 @@
 
 #ifdef CONFIG_KASAN_HW_TAGS
 #define TCR_MTE_FLAGS TCR_TCMA1 | TCR_TBI1 | TCR_TBID1
-#else
+#elif defined(CONFIG_ARM64_MTE)
 /*
  * The mte_zero_clear_page_tags() implementation uses DC GZVA, which relies on
  * TBI being enabled at EL1.
  */
 #define TCR_MTE_FLAGS TCR_TBI1 | TCR_TBID1
+#else
+#define TCR_MTE_FLAGS 0
 #endif
 
 /*
  * Default MAIR_EL1. MT_NORMAL_TAGGED is initially mapped as Normal memory and
- * changed during __cpu_setup to Normal Tagged if the system supports MTE.
+ * changed during mte_cpu_setup to Normal Tagged if the system supports MTE.
  */
 #define MAIR_EL1_SET							\
 	(MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRnE, MT_DEVICE_nGnRnE) |	\
@@ -426,46 +428,8 @@ SYM_FUNC_START(__cpu_setup)
 	mov_q	mair, MAIR_EL1_SET
 	mov_q	tcr, TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
 			TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \
-			TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS
-
-#ifdef CONFIG_ARM64_MTE
-	/*
-	 * Update MAIR_EL1, GCR_EL1 and TFSR*_EL1 if MTE is supported
-	 * (ID_AA64PFR1_EL1[11:8] > 1).
-	 */
-	mrs	x10, ID_AA64PFR1_EL1
-	ubfx	x10, x10, #ID_AA64PFR1_MTE_SHIFT, #4
-	cmp	x10, #ID_AA64PFR1_MTE
-	b.lt	1f
-
-	/* Normal Tagged memory type at the corresponding MAIR index */
-	mov	x10, #MAIR_ATTR_NORMAL_TAGGED
-	bfi	mair, x10, #(8 *  MT_NORMAL_TAGGED), #8
+			TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS | TCR_MTE_FLAGS
 
-	mov	x10, #KERNEL_GCR_EL1
-	msr_s	SYS_GCR_EL1, x10
-
-	/*
-	 * If GCR_EL1.RRND=1 is implemented the same way as RRND=0, then
-	 * RGSR_EL1.SEED must be non-zero for IRG to produce
-	 * pseudorandom numbers. As RGSR_EL1 is UNKNOWN out of reset, we
-	 * must initialize it.
-	 */
-	mrs	x10, CNTVCT_EL0
-	ands	x10, x10, #SYS_RGSR_EL1_SEED_MASK
-	csinc	x10, x10, xzr, ne
-	lsl	x10, x10, #SYS_RGSR_EL1_SEED_SHIFT
-	msr_s	SYS_RGSR_EL1, x10
-
-	/* clear any pending tag check faults in TFSR*_EL1 */
-	msr_s	SYS_TFSR_EL1, xzr
-	msr_s	SYS_TFSRE0_EL1, xzr
-
-	/* set the TCR_EL1 bits */
-	mov_q	x10, TCR_MTE_FLAGS
-	orr	tcr, tcr, x10
-1:
-#endif
 	tcr_clear_errata_bits tcr, x9, x5
 
 #ifdef CONFIG_ARM64_VA_BITS_52
-- 
cgit 


From 2305b809be930ce02e095ee7207e8b1be4a5cd75 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 22 Sep 2022 16:10:53 +0100
Subject: arm64: uaccess: simplify uaccess_mask_ptr()

We introduced uaccess pointer masking for arm64 in commit:

  4d8efc2d5ee4c9cc ("arm64: Use pointer masking to limit uaccess speculation")

Which was intended to prevent speculative uaccesses to kernel memory on
CPUs where access permissions were not respected under speculation.

At the time, the uaccess primitives were occasionally used to access
kernel memory, with the maximum permitted address held in
thread_info::addr_limit. Consequently, the address masking needed to
take this dynamic limit into account.

Subsequently the uaccess primitives were reworked such that they are
only used for user memory, and as of commit:

  3d2403fd10a1dbb3 ("arm64: uaccess: remove set_fs()")

... the address limit was made a compile-time constant, but the logic
was otherwise unchanged.

Regardless of the configured VA size or whether TBI is in use, the
address space can be divided into three ranges:

* The TTBR0 VA range, for which any valid pointer has bit 55 *clear*,
  and any non-tag bits [63-56] must match bit 55 (i.e. must be clear).

* The TTBR1 VA range, for which any valid pointer has bit 55 *set*, and
  any non-tag bits [63-56] must match bit 55 (i.e. must be set).

* The gap between the TTBR0 and TTBR1 ranges, where bit 55 may be set or
  clear, but any access will result in a fault.

As the uaccess primitives are now only used for user memory in the TTBR0
VA range, we can prevent generation of TTBR1 addresses by clearing bit
55, which will either result in a TTBR0 address or a faulting address
between the TTBR VA ranges.

This is beneficial for code generation as:

* We no longer clobber the condition codes.

* We no longer burn a register on (TASK_SIZE_MAX - 1).

* We no longer need to consume the untagged pointer.

When building a defconfig v6.0-rc3 with GCC 12.1.0, this change makes
the resulting Image 64KiB smaller.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Link: https://lore.kernel.org/r/20220922151053.3520750-1-mark.rutland@arm.com
[catalin.marinas@arm.com: remove csdb() as the bit clearing is unconditional]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/uaccess.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'arch/arm64')

diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 2fc9f0861769..5c7b2f9d5913 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -203,9 +203,11 @@ static inline void uaccess_enable_privileged(void)
 }
 
 /*
- * Sanitise a uaccess pointer such that it becomes NULL if above the maximum
- * user address. In case the pointer is tagged (has the top byte set), untag
- * the pointer before checking.
+ * Sanitize a uaccess pointer such that it cannot reach any kernel address.
+ *
+ * Clearing bit 55 ensures the pointer cannot address any portion of the TTBR1
+ * address range (i.e. any kernel address), and either the pointer falls within
+ * the TTBR0 address range or must cause a fault.
  */
 #define uaccess_mask_ptr(ptr) (__typeof__(ptr))__uaccess_mask_ptr(ptr)
 static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
@@ -213,14 +215,12 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
 	void __user *safe_ptr;
 
 	asm volatile(
-	"	bics	xzr, %3, %2\n"
-	"	csel	%0, %1, xzr, eq\n"
-	: "=&r" (safe_ptr)
-	: "r" (ptr), "r" (TASK_SIZE_MAX - 1),
-	  "r" (untagged_addr(ptr))
-	: "cc");
-
-	csdb();
+	"	bic	%0, %1, %2\n"
+	: "=r" (safe_ptr)
+	: "r" (ptr),
+	  "i" (BIT(55))
+	);
+
 	return safe_ptr;
 }
 
-- 
cgit 


From b9dd04a20f81333e4b99662f1bbaf7c9e3a1e137 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Wed, 21 Sep 2022 10:48:41 +0300
Subject: arm64/mm: fold check for KFENCE into can_set_direct_map()

KFENCE requires linear map to be mapped at page granularity, so that it
is possible to protect/unprotect single pages, just like with
rodata_full and DEBUG_PAGEALLOC.

Instead of repating

	can_set_direct_map() || IS_ENABLED(CONFIG_KFENCE)

make can_set_direct_map() handle the KFENCE case.

This also prevents potential false positives in kernel_page_present()
that may return true for non-present page if CONFIG_KFENCE is enabled.

Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Link: https://lore.kernel.org/r/20220921074841.382615-1-rppt@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/mmu.c      | 8 ++------
 arch/arm64/mm/pageattr.c | 8 +++++++-
 2 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'arch/arm64')

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 69deed27dec8..e8a48dfd550f 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -535,7 +535,7 @@ static void __init map_mem(pgd_t *pgdp)
 	 */
 	BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end));
 
-	if (can_set_direct_map() || IS_ENABLED(CONFIG_KFENCE))
+	if (can_set_direct_map())
 		flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
 
 	/*
@@ -1542,11 +1542,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
 
 	VM_BUG_ON(!mhp_range_allowed(start, size, true));
 
-	/*
-	 * KFENCE requires linear map to be mapped at page granularity, so that
-	 * it is possible to protect/unprotect single pages in the KFENCE pool.
-	 */
-	if (can_set_direct_map() || IS_ENABLED(CONFIG_KFENCE))
+	if (can_set_direct_map())
 		flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
 
 	__create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 64e985eaa52d..d107c3d434e2 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -21,7 +21,13 @@ bool rodata_full __ro_after_init = IS_ENABLED(CONFIG_RODATA_FULL_DEFAULT_ENABLED
 
 bool can_set_direct_map(void)
 {
-	return rodata_full || debug_pagealloc_enabled();
+	/*
+	 * rodata_full, DEBUG_PAGEALLOC and KFENCE require linear map to be
+	 * mapped at page granularity, so that it is possible to
+	 * protect/unprotect single pages.
+	 */
+	return rodata_full || debug_pagealloc_enabled() ||
+		IS_ENABLED(CONFIG_KFENCE);
 }
 
 static int change_page_range(pte_t *ptep, unsigned long addr, void *data)
-- 
cgit 


From 8c6e3657be6b39cd5943041d0a4ab6bd5d0c2258 Mon Sep 17 00:00:00 2001
From: Xiu Jianfeng <xiujianfeng@huawei.com>
Date: Sun, 11 Sep 2022 11:47:47 +0800
Subject: ARM64: reloc_test: add __init/__exit annotations to module init/exit
 funcs

Add missing __init/__exit annotations to module init/exit funcs.

Signed-off-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Link: https://lore.kernel.org/r/20220911034747.132098-1-xiujianfeng@huawei.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/reloc_test_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/arm64')

diff --git a/arch/arm64/kernel/reloc_test_core.c b/arch/arm64/kernel/reloc_test_core.c
index e87a2b7f20f6..99f2ffe9fc05 100644
--- a/arch/arm64/kernel/reloc_test_core.c
+++ b/arch/arm64/kernel/reloc_test_core.c
@@ -48,7 +48,7 @@ static struct {
 	{ "R_AARCH64_PREL16",		relative_data16, (u64)&sym64_rel },
 };
 
-static int reloc_test_init(void)
+static int __init reloc_test_init(void)
 {
 	int i;
 
@@ -67,7 +67,7 @@ static int reloc_test_init(void)
 	return 0;
 }
 
-static void reloc_test_exit(void)
+static void __exit reloc_test_exit(void)
 {
 }
 
-- 
cgit 


From a0caebbd04602cb7d28f6f316213a915ffab92a2 Mon Sep 17 00:00:00 2001
From: Liao Chang <liaochang1@huawei.com>
Date: Tue, 27 Sep 2022 10:24:35 +0800
Subject: arm64/kprobe: Optimize the performance of patching single-step slot

Single-step slot would not be used until kprobe is enabled, that means
no race condition occurs on it under SMP, hence it is safe to pacth ss
slot without stopping machine.

Since I and D caches are coherent within single-step slot from
aarch64_insn_patch_text_nosync(), hence no need to do it again via
flush_icache_range().

Acked-by: Will Deacon <will@kernel.org>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Liao Chang <liaochang1@huawei.com>
Link: https://lore.kernel.org/r/20220927022435.129965-4-liaochang1@huawei.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/probes/kprobes.c | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

(limited to 'arch/arm64')

diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index d1d182320245..c9e4d0720285 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -44,13 +44,28 @@ post_kprobe_handler(struct kprobe *, struct kprobe_ctlblk *, struct pt_regs *);
 static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
 {
 	kprobe_opcode_t *addr = p->ainsn.api.insn;
-	void *addrs[] = {addr, addr + 1};
-	u32 insns[] = {p->opcode, BRK64_OPCODE_KPROBES_SS};
 
-	/* prepare insn slot */
-	aarch64_insn_patch_text(addrs, insns, 2);
-
-	flush_icache_range((uintptr_t)addr, (uintptr_t)(addr + MAX_INSN_SIZE));
+	/*
+	 * Prepare insn slot, Mark Rutland points out it depends on a coupe of
+	 * subtleties:
+	 *
+	 * - That the I-cache maintenance for these instructions is complete
+	 *   *before* the kprobe BRK is written (and aarch64_insn_patch_text_nosync()
+	 *   ensures this, but just omits causing a Context-Synchronization-Event
+	 *   on all CPUS).
+	 *
+	 * - That the kprobe BRK results in an exception (and consequently a
+	 *   Context-Synchronoization-Event), which ensures that the CPU will
+	 *   fetch thesingle-step slot instructions *after* this, ensuring that
+	 *   the new instructions are used
+	 *
+	 * It supposes to place ISB after patching to guarantee I-cache maintenance
+	 * is observed on all CPUS, however, single-step slot is installed in
+	 * the BRK exception handler, so it is unnecessary to generate
+	 * Contex-Synchronization-Event via ISB again.
+	 */
+	aarch64_insn_patch_text_nosync(addr, p->opcode);
+	aarch64_insn_patch_text_nosync(addr + 1, BRK64_OPCODE_KPROBES_SS);
 
 	/*
 	 * Needs restoring of return address after stepping xol.
-- 
cgit