diff options
Diffstat (limited to 'arch/arm64/kernel')
32 files changed, 480 insertions, 319 deletions
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index bbaf0bc4ad60..86364ab6f13f 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -58,7 +58,6 @@ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_CRASH_CORE) += crash_core.o obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o -obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o obj-$(CONFIG_ARM64_MTE) += mte.o obj-y += vdso/ probes/ diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 73039949b5ce..a57cffb752e8 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -21,7 +21,8 @@ #define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset) #define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset) -static int all_alternatives_applied; +/* Volatile, as we may be patching the guts of READ_ONCE() */ +static volatile int all_alternatives_applied; static DECLARE_BITMAP(applied_alternatives, ARM64_NCAPS); @@ -205,7 +206,7 @@ static int __apply_alternatives_multi_stop(void *unused) /* We always have a CPU 0 at this point (__init) */ if (smp_processor_id()) { - while (!READ_ONCE(all_alternatives_applied)) + while (!all_alternatives_applied) cpu_relax(); isb(); } else { @@ -217,7 +218,7 @@ static int __apply_alternatives_multi_stop(void *unused) BUG_ON(all_alternatives_applied); __apply_alternatives(®ion, false, remaining_capabilities); /* Barriers provided by the cache flushing */ - WRITE_ONCE(all_alternatives_applied, 1); + all_alternatives_applied = 1; } return 0; diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 7364de008bab..0e86e8b9cedd 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -277,7 +277,7 @@ static void __init register_insn_emulation_sysctl(void) #define __user_swpX_asm(data, addr, res, temp, temp2, B) \ do { \ - uaccess_enable(); \ + uaccess_enable_privileged(); \ __asm__ __volatile__( \ " mov %w3, %w7\n" \ "0: ldxr"B" %w2, [%4]\n" \ @@ -302,7 +302,7 @@ do { \ "i" (-EFAULT), \ "i" (__SWP_LL_SC_LOOPS) \ : "memory"); \ - uaccess_disable(); \ + uaccess_disable_privileged(); \ } while (0) #define __user_swp_asm(data, addr, res, temp, temp2) \ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 7d32fc959b1a..679b19b8a7ff 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -30,7 +30,6 @@ int main(void) BLANK(); DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags)); DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count)); - DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit)); #ifdef CONFIG_ARM64_SW_TTBR0_PAN DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0)); #endif @@ -70,7 +69,7 @@ int main(void) DEFINE(S_PSTATE, offsetof(struct pt_regs, pstate)); DEFINE(S_PC, offsetof(struct pt_regs, pc)); DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno)); - DEFINE(S_ORIG_ADDR_LIMIT, offsetof(struct pt_regs, orig_addr_limit)); + DEFINE(S_SDEI_TTBR1, offsetof(struct pt_regs, sdei_ttbr1)); DEFINE(S_PMR_SAVE, offsetof(struct pt_regs, pmr_save)); DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe)); DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 6f36c4f62f69..39138f6d3ba2 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -153,10 +153,6 @@ EXPORT_SYMBOL(cpu_hwcap_keys); .width = 0, \ } -/* meta feature for alternatives */ -static bool __maybe_unused -cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused); - static void cpu_enable_cnp(struct arm64_cpu_capabilities const *cap); static bool __system_matches_cap(unsigned int n); @@ -1528,8 +1524,10 @@ bool cpu_has_amu_feat(int cpu) return cpumask_test_cpu(cpu, &amu_cpus); } -/* Initialize the use of AMU counters for frequency invariance */ -extern void init_cpu_freq_invariance_counters(void); +int get_cpu_with_amu_feat(void) +{ + return cpumask_any(&amu_cpus); +} static void cpu_amu_enable(struct arm64_cpu_capabilities const *cap) { @@ -1537,7 +1535,7 @@ static void cpu_amu_enable(struct arm64_cpu_capabilities const *cap) pr_info("detected CPU%d: Activity Monitors Unit (AMU)\n", smp_processor_id()); cpumask_set_cpu(smp_processor_id(), &amu_cpus); - init_cpu_freq_invariance_counters(); + update_freq_counters_refs(); } } @@ -1559,6 +1557,11 @@ static bool has_amu(const struct arm64_cpu_capabilities *cap, return true; } +#else +int get_cpu_with_amu_feat(void) +{ + return nr_cpu_ids; +} #endif #ifdef CONFIG_ARM64_VHE @@ -1600,7 +1603,7 @@ static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) WARN_ON_ONCE(in_interrupt()); sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0); - asm(SET_PSTATE_PAN(1)); + set_pstate_pan(1); } #endif /* CONFIG_ARM64_PAN */ @@ -1770,28 +1773,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, .matches = has_no_hw_prefetch, }, -#ifdef CONFIG_ARM64_UAO - { - .desc = "User Access Override", - .capability = ARM64_HAS_UAO, - .type = ARM64_CPUCAP_SYSTEM_FEATURE, - .matches = has_cpuid_feature, - .sys_reg = SYS_ID_AA64MMFR2_EL1, - .field_pos = ID_AA64MMFR2_UAO_SHIFT, - .min_field_value = 1, - /* - * We rely on stop_machine() calling uao_thread_switch() to set - * UAO immediately after patching. - */ - }, -#endif /* CONFIG_ARM64_UAO */ -#ifdef CONFIG_ARM64_PAN - { - .capability = ARM64_ALT_PAN_NOT_UAO, - .type = ARM64_CPUCAP_SYSTEM_FEATURE, - .matches = cpufeature_pan_not_uao, - }, -#endif /* CONFIG_ARM64_PAN */ #ifdef CONFIG_ARM64_VHE { .desc = "Virtualization Host Extensions", @@ -2138,6 +2119,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_enable_mte, }, #endif /* CONFIG_ARM64_MTE */ + { + .desc = "RCpc load-acquire (LDAPR)", + .capability = ARM64_HAS_LDAPR, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .sys_reg = SYS_ID_AA64ISAR1_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64ISAR1_LRCPC_SHIFT, + .matches = has_cpuid_feature, + .min_field_value = 1, + }, {}, }; @@ -2652,7 +2643,7 @@ bool this_cpu_has_cap(unsigned int n) * - The SYSTEM_FEATURE cpu_hwcaps may not have been set. * In all other cases cpus_have_{const_}cap() should be used. */ -static bool __system_matches_cap(unsigned int n) +static bool __maybe_unused __system_matches_cap(unsigned int n) { if (n < ARM64_NCAPS) { const struct arm64_cpu_capabilities *cap = cpu_hwcaps_ptrs[n]; @@ -2732,12 +2723,6 @@ void __init setup_cpu_features(void) ARCH_DMA_MINALIGN); } -static bool __maybe_unused -cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused) -{ - return (__system_matches_cap(ARM64_HAS_PAN) && !__system_matches_cap(ARM64_HAS_UAO)); -} - static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap) { cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index fa76151de6ff..4f3661eeb7ec 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -234,9 +234,8 @@ static void send_user_sigtrap(int si_code) if (interrupts_enabled(regs)) local_irq_enable(); - arm64_force_sig_fault(SIGTRAP, si_code, - (void __user *)instruction_pointer(regs), - "User debug trap"); + arm64_force_sig_fault(SIGTRAP, si_code, instruction_pointer(regs), + "User debug trap"); } static int single_step_handler(unsigned long unused, unsigned int esr, diff --git a/arch/arm64/kernel/efi-header.S b/arch/arm64/kernel/efi-header.S index a71844fb923e..28d8a5dca5f1 100644 --- a/arch/arm64/kernel/efi-header.S +++ b/arch/arm64/kernel/efi-header.S @@ -7,30 +7,48 @@ #include <linux/pe.h> #include <linux/sizes.h> + .macro efi_signature_nop +#ifdef CONFIG_EFI +.L_head: + /* + * This ccmp instruction has no meaningful effect except that + * its opcode forms the magic "MZ" signature required by UEFI. + */ + ccmp x18, #0, #0xd, pl +#else + /* + * Bootloaders may inspect the opcode at the start of the kernel + * image to decide if the kernel is capable of booting via UEFI. + * So put an ordinary NOP here, not the "MZ.." pseudo-nop above. + */ + nop +#endif + .endm + .macro __EFI_PE_HEADER +#ifdef CONFIG_EFI + .set .Lpe_header_offset, . - .L_head .long PE_MAGIC -coff_header: .short IMAGE_FILE_MACHINE_ARM64 // Machine - .short section_count // NumberOfSections + .short .Lsection_count // NumberOfSections .long 0 // TimeDateStamp .long 0 // PointerToSymbolTable .long 0 // NumberOfSymbols - .short section_table - optional_header // SizeOfOptionalHeader + .short .Lsection_table - .Loptional_header // SizeOfOptionalHeader .short IMAGE_FILE_DEBUG_STRIPPED | \ IMAGE_FILE_EXECUTABLE_IMAGE | \ IMAGE_FILE_LINE_NUMS_STRIPPED // Characteristics -optional_header: +.Loptional_header: .short PE_OPT_MAGIC_PE32PLUS // PE32+ format .byte 0x02 // MajorLinkerVersion .byte 0x14 // MinorLinkerVersion - .long __initdata_begin - efi_header_end // SizeOfCode + .long __initdata_begin - .Lefi_header_end // SizeOfCode .long __pecoff_data_size // SizeOfInitializedData .long 0 // SizeOfUninitializedData - .long __efistub_efi_pe_entry - _head // AddressOfEntryPoint - .long efi_header_end - _head // BaseOfCode + .long __efistub_efi_pe_entry - .L_head // AddressOfEntryPoint + .long .Lefi_header_end - .L_head // BaseOfCode -extra_header_fields: .quad 0 // ImageBase .long SEGMENT_ALIGN // SectionAlignment .long PECOFF_FILE_ALIGNMENT // FileAlignment @@ -42,10 +60,10 @@ extra_header_fields: .short 0 // MinorSubsystemVersion .long 0 // Win32VersionValue - .long _end - _head // SizeOfImage + .long _end - .L_head // SizeOfImage // Everything before the kernel image is considered part of the header - .long efi_header_end - _head // SizeOfHeaders + .long .Lefi_header_end - .L_head // SizeOfHeaders .long 0 // CheckSum .short IMAGE_SUBSYSTEM_EFI_APPLICATION // Subsystem .short 0 // DllCharacteristics @@ -54,7 +72,7 @@ extra_header_fields: .quad 0 // SizeOfHeapReserve .quad 0 // SizeOfHeapCommit .long 0 // LoaderFlags - .long (section_table - .) / 8 // NumberOfRvaAndSizes + .long (.Lsection_table - .) / 8 // NumberOfRvaAndSizes .quad 0 // ExportTable .quad 0 // ImportTable @@ -64,17 +82,17 @@ extra_header_fields: .quad 0 // BaseRelocationTable #ifdef CONFIG_DEBUG_EFI - .long efi_debug_table - _head // DebugTable - .long efi_debug_table_size + .long .Lefi_debug_table - .L_head // DebugTable + .long .Lefi_debug_table_size #endif // Section table -section_table: +.Lsection_table: .ascii ".text\0\0\0" - .long __initdata_begin - efi_header_end // VirtualSize - .long efi_header_end - _head // VirtualAddress - .long __initdata_begin - efi_header_end // SizeOfRawData - .long efi_header_end - _head // PointerToRawData + .long __initdata_begin - .Lefi_header_end // VirtualSize + .long .Lefi_header_end - .L_head // VirtualAddress + .long __initdata_begin - .Lefi_header_end // SizeOfRawData + .long .Lefi_header_end - .L_head // PointerToRawData .long 0 // PointerToRelocations .long 0 // PointerToLineNumbers @@ -86,9 +104,9 @@ section_table: .ascii ".data\0\0\0" .long __pecoff_data_size // VirtualSize - .long __initdata_begin - _head // VirtualAddress + .long __initdata_begin - .L_head // VirtualAddress .long __pecoff_data_rawsize // SizeOfRawData - .long __initdata_begin - _head // PointerToRawData + .long __initdata_begin - .L_head // PointerToRawData .long 0 // PointerToRelocations .long 0 // PointerToLineNumbers @@ -98,7 +116,7 @@ section_table: IMAGE_SCN_MEM_READ | \ IMAGE_SCN_MEM_WRITE // Characteristics - .set section_count, (. - section_table) / 40 + .set .Lsection_count, (. - .Lsection_table) / 40 #ifdef CONFIG_DEBUG_EFI /* @@ -114,21 +132,21 @@ section_table: __INITRODATA .align 2 -efi_debug_table: +.Lefi_debug_table: // EFI_IMAGE_DEBUG_DIRECTORY_ENTRY .long 0 // Characteristics .long 0 // TimeDateStamp .short 0 // MajorVersion .short 0 // MinorVersion .long IMAGE_DEBUG_TYPE_CODEVIEW // Type - .long efi_debug_entry_size // SizeOfData + .long .Lefi_debug_entry_size // SizeOfData .long 0 // RVA - .long efi_debug_entry - _head // FileOffset + .long .Lefi_debug_entry - .L_head // FileOffset - .set efi_debug_table_size, . - efi_debug_table + .set .Lefi_debug_table_size, . - .Lefi_debug_table .previous -efi_debug_entry: +.Lefi_debug_entry: // EFI_IMAGE_DEBUG_CODEVIEW_NB10_ENTRY .ascii "NB10" // Signature .long 0 // Unknown @@ -137,16 +155,12 @@ efi_debug_entry: .asciz VMLINUX_PATH - .set efi_debug_entry_size, . - efi_debug_entry + .set .Lefi_debug_entry_size, . - .Lefi_debug_entry #endif - /* - * EFI will load .text onwards at the 4k section alignment - * described in the PE/COFF header. To ensure that instruction - * sequences using an adrp and a :lo12: immediate will function - * correctly at this alignment, we must ensure that .text is - * placed at a 4k boundary in the Image to begin with. - */ .balign SEGMENT_ALIGN -efi_header_end: +.Lefi_header_end: +#else + .set .Lpe_header_offset, 0x0 +#endif .endm diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 70e0a7591245..5346953e4382 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -115,7 +115,6 @@ static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr) enter_from_kernel_mode(regs); local_daif_inherit(regs); - far = untagged_addr(far); do_mem_abort(far, esr, regs); local_daif_mask(); exit_to_kernel_mode(regs); @@ -256,7 +255,6 @@ static void noinstr el0_da(struct pt_regs *regs, unsigned long esr) enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); - far = untagged_addr(far); do_mem_abort(far, esr, regs); } diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index d72c818b019c..51c762156099 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -216,12 +216,6 @@ alternative_else_nop_endif .else add x21, sp, #S_FRAME_SIZE get_current_task tsk - /* Save the task's original addr_limit and set USER_DS */ - ldr x20, [tsk, #TSK_TI_ADDR_LIMIT] - str x20, [sp, #S_ORIG_ADDR_LIMIT] - mov x20, #USER_DS - str x20, [tsk, #TSK_TI_ADDR_LIMIT] - /* No need to reset PSTATE.UAO, hardware's already set it to 0 for us */ .endif /* \el == 0 */ mrs x22, elr_el1 mrs x23, spsr_el1 @@ -279,12 +273,6 @@ alternative_else_nop_endif .macro kernel_exit, el .if \el != 0 disable_daif - - /* Restore the task's original addr_limit. */ - ldr x20, [sp, #S_ORIG_ADDR_LIMIT] - str x20, [tsk, #TSK_TI_ADDR_LIMIT] - - /* No need to restore UAO, it will be restored from SPSR_EL1 */ .endif /* Restore pmr */ @@ -438,7 +426,7 @@ SYM_CODE_END(__swpan_exit_el0) #ifdef CONFIG_SHADOW_CALL_STACK /* also switch to the irq shadow stack */ - adr_this_cpu scs_sp, irq_shadow_call_stack, x26 + ldr_this_cpu scs_sp, irq_shadow_call_stack_ptr, x26 #endif 9998: @@ -773,9 +761,10 @@ SYM_CODE_END(ret_to_user) */ .pushsection ".entry.tramp.text", "ax" + // Move from tramp_pg_dir to swapper_pg_dir .macro tramp_map_kernel, tmp mrs \tmp, ttbr1_el1 - add \tmp, \tmp, #(PAGE_SIZE + RESERVED_TTBR0_SIZE) + add \tmp, \tmp, #(2 * PAGE_SIZE) bic \tmp, \tmp, #USER_ASID_FLAG msr ttbr1_el1, \tmp #ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003 @@ -792,9 +781,10 @@ alternative_else_nop_endif #endif /* CONFIG_QCOM_FALKOR_ERRATUM_1003 */ .endm + // Move from swapper_pg_dir to tramp_pg_dir .macro tramp_unmap_kernel, tmp mrs \tmp, ttbr1_el1 - sub \tmp, \tmp, #(PAGE_SIZE + RESERVED_TTBR0_SIZE) + sub \tmp, \tmp, #(2 * PAGE_SIZE) orr \tmp, \tmp, #USER_ASID_FLAG msr ttbr1_el1, \tmp /* @@ -965,10 +955,9 @@ SYM_CODE_START(__sdei_asm_entry_trampoline) mov x4, xzr /* - * Use reg->interrupted_regs.addr_limit to remember whether to unmap - * the kernel on exit. + * Remember whether to unmap the kernel on exit. */ -1: str x4, [x1, #(SDEI_EVENT_INTREGS + S_ORIG_ADDR_LIMIT)] +1: str x4, [x1, #(SDEI_EVENT_INTREGS + S_SDEI_TTBR1)] #ifdef CONFIG_RANDOMIZE_BASE adr x4, tramp_vectors + PAGE_SIZE @@ -989,7 +978,7 @@ NOKPROBE(__sdei_asm_entry_trampoline) * x4: struct sdei_registered_event argument from registration time. */ SYM_CODE_START(__sdei_asm_exit_trampoline) - ldr x4, [x4, #(SDEI_EVENT_INTREGS + S_ORIG_ADDR_LIMIT)] + ldr x4, [x4, #(SDEI_EVENT_INTREGS + S_SDEI_TTBR1)] cbnz x4, 1f tramp_unmap_kernel tmp=x4 @@ -1063,9 +1052,9 @@ SYM_CODE_START(__sdei_asm_handler) #ifdef CONFIG_SHADOW_CALL_STACK /* Use a separate shadow call stack for normal and critical events */ cbnz w4, 3f - adr_this_cpu dst=scs_sp, sym=sdei_shadow_call_stack_normal, tmp=x6 + ldr_this_cpu dst=scs_sp, sym=sdei_shadow_call_stack_normal_ptr, tmp=x6 b 4f -3: adr_this_cpu dst=scs_sp, sym=sdei_shadow_call_stack_critical, tmp=x6 +3: ldr_this_cpu dst=scs_sp, sym=sdei_shadow_call_stack_critical_ptr, tmp=x6 4: #endif diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index d8d9caf02834..f2eb206920a2 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -58,21 +58,11 @@ * in the entry routines. */ __HEAD -_head: /* * DO NOT MODIFY. Image header expected by Linux boot-loaders. */ -#ifdef CONFIG_EFI - /* - * This add instruction has no meaningful effect except that - * its opcode forms the magic "MZ" signature required by UEFI. - */ - add x13, x18, #0x16 - b primary_entry -#else + efi_signature_nop // special NOP to identity as PE/COFF executable b primary_entry // branch to kernel start, magic - .long 0 // reserved -#endif .quad 0 // Image load offset from start of RAM, little-endian le64sym _kernel_size_le // Effective size of kernel image, little-endian le64sym _kernel_flags_le // Informative flags, little-endian @@ -80,14 +70,9 @@ _head: .quad 0 // reserved .quad 0 // reserved .ascii ARM64_IMAGE_MAGIC // Magic number -#ifdef CONFIG_EFI - .long pe_header - _head // Offset to the PE header. + .long .Lpe_header_offset // Offset to the PE header. -pe_header: __EFI_PE_HEADER -#else - .long 0 // reserved -#endif __INIT @@ -104,7 +89,7 @@ pe_header: */ SYM_CODE_START(primary_entry) bl preserve_boot_args - bl el2_setup // Drop to EL1, w0=cpu_boot_mode + bl init_kernel_el // w0=cpu_boot_mode adrp x23, __PHYS_OFFSET and x23, x23, MIN_KIMG_ALIGN - 1 // KASLR offset, defaults to 0 bl set_cpu_boot_mode_flag @@ -482,24 +467,33 @@ EXPORT_SYMBOL(kimage_vaddr) .section ".idmap.text","awx" /* - * If we're fortunate enough to boot at EL2, ensure that the world is - * sane before dropping to EL1. + * Starting from EL2 or EL1, configure the CPU to execute at the highest + * reachable EL supported by the kernel in a chosen default state. If dropping + * from EL2 to EL1, configure EL2 before configuring EL1. + * + * Since we cannot always rely on ERET synchronizing writes to sysregs (e.g. if + * SCTLR_ELx.EOS is clear), we place an ISB prior to ERET. * * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w0 if * booted in EL1 or EL2 respectively. */ -SYM_FUNC_START(el2_setup) - msr SPsel, #1 // We want to use SP_EL{1,2} +SYM_FUNC_START(init_kernel_el) mrs x0, CurrentEL cmp x0, #CurrentEL_EL2 - b.eq 1f - mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1) + b.eq init_el2 + +SYM_INNER_LABEL(init_el1, SYM_L_LOCAL) + mov_q x0, INIT_SCTLR_EL1_MMU_OFF msr sctlr_el1, x0 - mov w0, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1 isb - ret + mov_q x0, INIT_PSTATE_EL1 + msr spsr_el1, x0 + msr elr_el1, lr + mov w0, #BOOT_CPU_MODE_EL1 + eret -1: mov_q x0, (SCTLR_EL2_RES1 | ENDIAN_SET_EL2) +SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) + mov_q x0, INIT_SCTLR_EL2_MMU_OFF msr sctlr_el2, x0 #ifdef CONFIG_ARM64_VHE @@ -608,9 +602,12 @@ set_hcr: cbz x2, install_el2_stub - mov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 isb - ret + mov_q x0, INIT_PSTATE_EL2 + msr spsr_el2, x0 + msr elr_el2, lr + mov w0, #BOOT_CPU_MODE_EL2 + eret SYM_INNER_LABEL(install_el2_stub, SYM_L_LOCAL) /* @@ -620,7 +617,7 @@ SYM_INNER_LABEL(install_el2_stub, SYM_L_LOCAL) * requires no configuration, and all non-hyp-specific EL2 setup * will be done via the _EL1 system register aliases in __cpu_setup. */ - mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1) + mov_q x0, INIT_SCTLR_EL1_MMU_OFF msr sctlr_el1, x0 /* Coprocessor traps. */ @@ -642,14 +639,13 @@ SYM_INNER_LABEL(install_el2_stub, SYM_L_LOCAL) 7: adr_l x0, __hyp_stub_vectors msr vbar_el2, x0 - /* spsr */ - mov x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ - PSR_MODE_EL1h) + isb + mov x0, #INIT_PSTATE_EL1 msr spsr_el2, x0 msr elr_el2, lr - mov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 + mov w0, #BOOT_CPU_MODE_EL2 eret -SYM_FUNC_END(el2_setup) +SYM_FUNC_END(init_kernel_el) /* * Sets the __boot_cpu_mode flag depending on the CPU boot mode passed @@ -699,7 +695,7 @@ SYM_DATA_END(__early_cpu_boot_status) * cores are held until we're ready for them to initialise. */ SYM_FUNC_START(secondary_holding_pen) - bl el2_setup // Drop to EL1, w0=cpu_boot_mode + bl init_kernel_el // w0=cpu_boot_mode bl set_cpu_boot_mode_flag mrs x0, mpidr_el1 mov_q x1, MPIDR_HWID_BITMASK @@ -717,7 +713,7 @@ SYM_FUNC_END(secondary_holding_pen) * be used where CPUs are brought online dynamically by the kernel. */ SYM_FUNC_START(secondary_entry) - bl el2_setup // Drop to EL1 + bl init_kernel_el // w0=cpu_boot_mode bl set_cpu_boot_mode_flag b secondary_startup SYM_FUNC_END(secondary_entry) diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 61684a500914..c615b285ff5b 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -87,7 +87,6 @@ KVM_NVHE_ALIAS(__icache_flags); /* Kernel symbols needed for cpus_have_final/const_caps checks. */ KVM_NVHE_ALIAS(arm64_const_caps_ready); KVM_NVHE_ALIAS(cpu_hwcap_keys); -KVM_NVHE_ALIAS(cpu_hwcaps); /* Static keys which are set if a vGIC trap should be handled in hyp. */ KVM_NVHE_ALIAS(vgic_v2_cpuif_trap); diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 60456a62da11..dfb1feab867d 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -17,6 +17,7 @@ #include <linux/init.h> #include <linux/irqchip.h> #include <linux/kprobes.h> +#include <linux/scs.h> #include <linux/seq_file.h> #include <linux/vmalloc.h> #include <asm/daifflags.h> @@ -27,6 +28,25 @@ DEFINE_PER_CPU(struct nmi_ctx, nmi_contexts); DEFINE_PER_CPU(unsigned long *, irq_stack_ptr); + +DECLARE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr); + +#ifdef CONFIG_SHADOW_CALL_STACK +DEFINE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr); +#endif + +static void init_irq_scs(void) +{ + int cpu; + + if (!IS_ENABLED(CONFIG_SHADOW_CALL_STACK)) + return; + + for_each_possible_cpu(cpu) + per_cpu(irq_shadow_call_stack_ptr, cpu) = + scs_alloc(cpu_to_node(cpu)); +} + #ifdef CONFIG_VMAP_STACK static void init_irq_stacks(void) { @@ -54,6 +74,7 @@ static void init_irq_stacks(void) void __init init_IRQ(void) { init_irq_stacks(); + init_irq_scs(); irqchip_init(); if (!handle_arch_irq) panic("No interrupt controller found."); diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index b181e0544b79..0921aa1520b0 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -50,10 +50,16 @@ static __init u64 get_kaslr_seed(void *fdt) return ret; } -static __init const u8 *kaslr_get_cmdline(void *fdt) +static __init bool cmdline_contains_nokaslr(const u8 *cmdline) { - static __initconst const u8 default_cmdline[] = CONFIG_CMDLINE; + const u8 *str; + str = strstr(cmdline, "nokaslr"); + return str == cmdline || (str > cmdline && *(str - 1) == ' '); +} + +static __init bool is_kaslr_disabled_cmdline(void *fdt) +{ if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) { int node; const u8 *prop; @@ -65,10 +71,17 @@ static __init const u8 *kaslr_get_cmdline(void *fdt) prop = fdt_getprop(fdt, node, "bootargs", NULL); if (!prop) goto out; - return prop; + + if (cmdline_contains_nokaslr(prop)) + return true; + + if (IS_ENABLED(CONFIG_CMDLINE_EXTEND)) + goto out; + + return false; } out: - return default_cmdline; + return cmdline_contains_nokaslr(CONFIG_CMDLINE); } /* @@ -83,7 +96,6 @@ u64 __init kaslr_early_init(u64 dt_phys) { void *fdt; u64 seed, offset, mask, module_range; - const u8 *cmdline, *str; unsigned long raw; int size; @@ -115,9 +127,7 @@ u64 __init kaslr_early_init(u64 dt_phys) * Check if 'nokaslr' appears on the command line, and * return 0 if that is the case. */ - cmdline = kaslr_get_cmdline(fdt); - str = strstr(cmdline, "nokaslr"); - if (str == cmdline || (str > cmdline && *(str - 1) == ' ')) { + if (is_kaslr_disabled_cmdline(fdt)) { kaslr_status = KASLR_DISABLED_CMDLINE; return 0; } diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 3605f77ad4df..38bb07eff872 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -23,6 +23,8 @@ #include <linux/platform_device.h> #include <linux/sched_clock.h> #include <linux/smp.h> +#include <linux/nmi.h> +#include <linux/cpufreq.h> /* ARMv8 Cortex-A53 specific event types. */ #define ARMV8_A53_PERFCTR_PREF_LINEFILL 0xC2 @@ -1248,10 +1250,21 @@ static struct platform_driver armv8_pmu_driver = { static int __init armv8_pmu_driver_init(void) { + int ret; + if (acpi_disabled) - return platform_driver_register(&armv8_pmu_driver); + ret = platform_driver_register(&armv8_pmu_driver); else - return arm_pmu_acpi_probe(armv8_pmuv3_init); + ret = arm_pmu_acpi_probe(armv8_pmuv3_init); + + /* + * Try to re-initialize lockup detector after PMU init in + * case PMU events are triggered via NMIs. + */ + if (ret == 0 && arm_pmu_irq_is_nmi()) + lockup_detector_init(); + + return ret; } device_initcall(armv8_pmu_driver_init) @@ -1309,3 +1322,27 @@ void arch_perf_update_userpage(struct perf_event *event, userpg->cap_user_time_zero = 1; userpg->cap_user_time_short = 1; } + +#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF +/* + * Safe maximum CPU frequency in case a particular platform doesn't implement + * cpufreq driver. Although, architecture doesn't put any restrictions on + * maximum frequency but 5 GHz seems to be safe maximum given the available + * Arm CPUs in the market which are clocked much less than 5 GHz. On the other + * hand, we can't make it much higher as it would lead to a large hard-lockup + * detection timeout on parts which are running slower (eg. 1GHz on + * Developerbox) and doesn't possess a cpufreq driver. + */ +#define SAFE_MAX_CPU_FREQ 5000000000UL // 5 GHz +u64 hw_nmi_get_sample_period(int watchdog_thresh) +{ + unsigned int cpu = smp_processor_id(); + unsigned long max_cpu_freq; + + max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL; + if (!max_cpu_freq) + max_cpu_freq = SAFE_MAX_CPU_FREQ; + + return (u64)max_cpu_freq * watchdog_thresh; +} +#endif diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index f11a1a1f7026..89c64ada8732 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -34,7 +34,7 @@ DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); static void __kprobes -post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *); +post_kprobe_handler(struct kprobe *, struct kprobe_ctlblk *, struct pt_regs *); static void __kprobes arch_prepare_ss_slot(struct kprobe *p) { @@ -68,7 +68,7 @@ static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs) p->ainsn.api.handler((u32)p->opcode, (long)p->addr, regs); /* single step simulated, now go for post processing */ - post_kprobe_handler(kcb, regs); + post_kprobe_handler(p, kcb, regs); } int __kprobes arch_prepare_kprobe(struct kprobe *p) @@ -177,19 +177,6 @@ static void __kprobes kprobes_restore_local_irqflag(struct kprobe_ctlblk *kcb, regs->pstate |= kcb->saved_irqflag; } -static void __kprobes -set_ss_context(struct kprobe_ctlblk *kcb, unsigned long addr) -{ - kcb->ss_ctx.ss_pending = true; - kcb->ss_ctx.match_addr = addr + sizeof(kprobe_opcode_t); -} - -static void __kprobes clear_ss_context(struct kprobe_ctlblk *kcb) -{ - kcb->ss_ctx.ss_pending = false; - kcb->ss_ctx.match_addr = 0; -} - static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb, int reenter) @@ -209,7 +196,6 @@ static void __kprobes setup_singlestep(struct kprobe *p, /* prepare for single stepping */ slot = (unsigned long)p->ainsn.api.insn; - set_ss_context(kcb, slot); /* mark pending ss */ kprobes_save_local_irqflag(kcb, regs); instruction_pointer_set(regs, slot); } else { @@ -243,13 +229,8 @@ static int __kprobes reenter_kprobe(struct kprobe *p, } static void __kprobes -post_kprobe_handler(struct kprobe_ctlblk *kcb, struct pt_regs *regs) +post_kprobe_handler(struct kprobe *cur, struct kprobe_ctlblk *kcb, struct pt_regs *regs) { - struct kprobe *cur = kprobe_running(); - - if (!cur) - return; - /* return addr restore if non-branching insn */ if (cur->ainsn.api.restore != 0) instruction_pointer_set(regs, cur->ainsn.api.restore); @@ -365,32 +346,22 @@ static void __kprobes kprobe_handler(struct pt_regs *regs) } static int __kprobes -kprobe_ss_hit(struct kprobe_ctlblk *kcb, unsigned long addr) -{ - if ((kcb->ss_ctx.ss_pending) - && (kcb->ss_ctx.match_addr == addr)) { - clear_ss_context(kcb); /* clear pending ss */ - return DBG_HOOK_HANDLED; - } - /* not ours, kprobes should ignore it */ - return DBG_HOOK_ERROR; -} - -static int __kprobes kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned int esr) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - int retval; - - /* return error if this is not our step */ - retval = kprobe_ss_hit(kcb, instruction_pointer(regs)); + unsigned long addr = instruction_pointer(regs); + struct kprobe *cur = kprobe_running(); - if (retval == DBG_HOOK_HANDLED) { + if (cur && (kcb->kprobe_status == KPROBE_HIT_SS) + && ((unsigned long)&cur->ainsn.api.insn[1] == addr)) { kprobes_restore_local_irqflag(kcb, regs); - post_kprobe_handler(kcb, regs); + post_kprobe_handler(cur, kcb, regs); + + return DBG_HOOK_HANDLED; } - return retval; + /* not ours, kprobes should ignore it */ + return DBG_HOOK_ERROR; } static struct break_hook kprobes_break_ss_hook = { diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 0c65a91f8f53..3834cfafbbc2 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -422,16 +422,15 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, if (clone_flags & CLONE_SETTLS) p->thread.uw.tp_value = tls; } else { + /* + * A kthread has no context to ERET to, so ensure any buggy + * ERET is treated as an illegal exception return. + * + * When a user task is created from a kthread, childregs will + * be initialized by start_thread() or start_compat_thread(). + */ memset(childregs, 0, sizeof(struct pt_regs)); - childregs->pstate = PSR_MODE_EL1h; - if (IS_ENABLED(CONFIG_ARM64_UAO) && - cpus_have_const_cap(ARM64_HAS_UAO)) - childregs->pstate |= PSR_UAO_BIT; - - spectre_v4_enable_task_mitigation(p); - - if (system_uses_irq_prio_masking()) - childregs->pmr_save = GIC_PRIO_IRQON; + childregs->pstate = PSR_MODE_EL1h | PSR_IL_BIT; p->thread.cpu_context.x19 = stack_start; p->thread.cpu_context.x20 = stk_sz; @@ -461,17 +460,6 @@ static void tls_thread_switch(struct task_struct *next) write_sysreg(*task_user_tls(next), tpidr_el0); } -/* Restore the UAO state depending on next's addr_limit */ -void uao_thread_switch(struct task_struct *next) -{ - if (IS_ENABLED(CONFIG_ARM64_UAO)) { - if (task_thread_info(next)->addr_limit == KERNEL_DS) - asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO)); - else - asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO)); - } -} - /* * Force SSBS state on context-switch, since it may be lost after migrating * from a CPU which treats the bit as RES0 in a heterogeneous system. @@ -554,7 +542,6 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev, hw_breakpoint_thread_switch(next); contextidr_thread_switch(next); entry_task_switch(next); - uao_thread_switch(next); ssbs_thread_switch(next); erratum_1418040_thread_switch(prev, next); diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index f6e4e3737405..4c25c008504f 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -24,6 +24,7 @@ #include <linux/prctl.h> #include <linux/sched/task_stack.h> +#include <asm/insn.h> #include <asm/spectre.h> #include <asm/traps.h> @@ -538,12 +539,12 @@ static enum mitigation_state spectre_v4_enable_hw_mitigation(void) if (spectre_v4_mitigations_off()) { sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS); - asm volatile(SET_PSTATE_SSBS(1)); + set_pstate_ssbs(1); return SPECTRE_VULNERABLE; } /* SCTLR_EL1.DSSBS was initialised to 0 during boot */ - asm volatile(SET_PSTATE_SSBS(0)); + set_pstate_ssbs(0); return SPECTRE_MITIGATED; } diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index f49b349e16a3..8ac487c84e37 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -192,14 +192,11 @@ static void ptrace_hbptriggered(struct perf_event *bp, break; } } - arm64_force_sig_ptrace_errno_trap(si_errno, - (void __user *)bkpt->trigger, + arm64_force_sig_ptrace_errno_trap(si_errno, bkpt->trigger, desc); } #endif - arm64_force_sig_fault(SIGTRAP, TRAP_HWBKPT, - (void __user *)(bkpt->trigger), - desc); + arm64_force_sig_fault(SIGTRAP, TRAP_HWBKPT, bkpt->trigger, desc); } /* diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c deleted file mode 100644 index e8f7ff45dd8f..000000000000 --- a/arch/arm64/kernel/scs.c +++ /dev/null @@ -1,16 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Shadow Call Stack support. - * - * Copyright (C) 2019 Google LLC - */ - -#include <linux/percpu.h> -#include <linux/scs.h> - -DEFINE_SCS(irq_shadow_call_stack); - -#ifdef CONFIG_ARM_SDE_INTERFACE -DEFINE_SCS(sdei_shadow_call_stack_normal); -DEFINE_SCS(sdei_shadow_call_stack_critical); -#endif diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index 793c46d6a447..2c7ca449dd51 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -7,6 +7,7 @@ #include <linux/hardirq.h> #include <linux/irqflags.h> #include <linux/sched/task_stack.h> +#include <linux/scs.h> #include <linux/uaccess.h> #include <asm/alternative.h> @@ -38,6 +39,14 @@ DEFINE_PER_CPU(unsigned long *, sdei_stack_normal_ptr); DEFINE_PER_CPU(unsigned long *, sdei_stack_critical_ptr); #endif +DECLARE_PER_CPU(unsigned long *, sdei_shadow_call_stack_normal_ptr); +DECLARE_PER_CPU(unsigned long *, sdei_shadow_call_stack_critical_ptr); + +#ifdef CONFIG_SHADOW_CALL_STACK +DEFINE_PER_CPU(unsigned long *, sdei_shadow_call_stack_normal_ptr); +DEFINE_PER_CPU(unsigned long *, sdei_shadow_call_stack_critical_ptr); +#endif + static void _free_sdei_stack(unsigned long * __percpu *ptr, int cpu) { unsigned long *p; @@ -53,6 +62,9 @@ static void free_sdei_stacks(void) { int cpu; + if (!IS_ENABLED(CONFIG_VMAP_STACK)) + return; + for_each_possible_cpu(cpu) { _free_sdei_stack(&sdei_stack_normal_ptr, cpu); _free_sdei_stack(&sdei_stack_critical_ptr, cpu); @@ -76,6 +88,9 @@ static int init_sdei_stacks(void) int cpu; int err = 0; + if (!IS_ENABLED(CONFIG_VMAP_STACK)) + return 0; + for_each_possible_cpu(cpu) { err = _init_sdei_stack(&sdei_stack_normal_ptr, cpu); if (err) @@ -91,6 +106,62 @@ static int init_sdei_stacks(void) return err; } +static void _free_sdei_scs(unsigned long * __percpu *ptr, int cpu) +{ + void *s; + + s = per_cpu(*ptr, cpu); + if (s) { + per_cpu(*ptr, cpu) = NULL; + scs_free(s); + } +} + +static void free_sdei_scs(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + _free_sdei_scs(&sdei_shadow_call_stack_normal_ptr, cpu); + _free_sdei_scs(&sdei_shadow_call_stack_critical_ptr, cpu); + } +} + +static int _init_sdei_scs(unsigned long * __percpu *ptr, int cpu) +{ + void *s; + + s = scs_alloc(cpu_to_node(cpu)); + if (!s) + return -ENOMEM; + per_cpu(*ptr, cpu) = s; + + return 0; +} + +static int init_sdei_scs(void) +{ + int cpu; + int err = 0; + + if (!IS_ENABLED(CONFIG_SHADOW_CALL_STACK)) + return 0; + + for_each_possible_cpu(cpu) { + err = _init_sdei_scs(&sdei_shadow_call_stack_normal_ptr, cpu); + if (err) + break; + err = _init_sdei_scs(&sdei_shadow_call_stack_critical_ptr, cpu); + if (err) + break; + } + + if (err) + free_sdei_scs(); + + return err; +} + static bool on_sdei_normal_stack(unsigned long sp, struct stack_info *info) { unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr); @@ -131,13 +202,14 @@ unsigned long sdei_arch_get_entry_point(int conduit) */ if (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) { pr_err("Not supported on this hardware/boot configuration\n"); - return 0; + goto out_err; } - if (IS_ENABLED(CONFIG_VMAP_STACK)) { - if (init_sdei_stacks()) - return 0; - } + if (init_sdei_stacks()) + goto out_err; + + if (init_sdei_scs()) + goto out_err_free_stacks; sdei_exit_mode = (conduit == SMCCC_CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC; @@ -152,6 +224,10 @@ unsigned long sdei_arch_get_entry_point(int conduit) #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ return (unsigned long)__sdei_asm_handler; +out_err_free_stacks: + free_sdei_stacks(); +out_err: + return 0; } /* @@ -179,12 +255,6 @@ static __kprobes unsigned long _sdei_handler(struct pt_regs *regs, sdei_api_event_context(i, ®s->regs[i]); } - /* - * We didn't take an exception to get here, set PAN. UAO will be cleared - * by sdei_event_handler()s force_uaccess_begin() call. - */ - __uaccess_enable_hw_pan(); - err = sdei_event_handler(regs, arg); if (err) return SDEI_EV_FAILED; @@ -223,12 +293,39 @@ static __kprobes unsigned long _sdei_handler(struct pt_regs *regs, return vbar + 0x480; } +static void __kprobes notrace __sdei_pstate_entry(void) +{ + /* + * The original SDEI spec (ARM DEN 0054A) can be read ambiguously as to + * whether PSTATE bits are inherited unchanged or generated from + * scratch, and the TF-A implementation always clears PAN and always + * clears UAO. There are no other known implementations. + * + * Subsequent revisions (ARM DEN 0054B) follow the usual rules for how + * PSTATE is modified upon architectural exceptions, and so PAN is + * either inherited or set per SCTLR_ELx.SPAN, and UAO is always + * cleared. + * + * We must explicitly reset PAN to the expected state, including + * clearing it when the host isn't using it, in case a VM had it set. + */ + if (system_uses_hw_pan()) + set_pstate_pan(1); + else if (cpu_has_pan()) + set_pstate_pan(0); +} asmlinkage noinstr unsigned long __sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg) { unsigned long ret; + /* + * We didn't take an exception to get here, so the HW hasn't + * set/cleared bits in PSTATE that we may rely on. Initialize PAN. + */ + __sdei_pstate_entry(); + arm64_enter_nmi(regs); ret = _sdei_handler(regs, arg); diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 133257ffd859..1a57a76e1cc2 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -206,7 +206,7 @@ static void __init request_standard_resources(void) unsigned long i = 0; size_t res_size; - kernel_code.start = __pa_symbol(_text); + kernel_code.start = __pa_symbol(_stext); kernel_code.end = __pa_symbol(__init_begin - 1); kernel_data.start = __pa_symbol(_sdata); kernel_data.end = __pa_symbol(_end - 1); @@ -283,7 +283,7 @@ u64 cpu_logical_map(int cpu) void __init __no_sanitize_address setup_arch(char **cmdline_p) { - init_mm.start_code = (unsigned long) _text; + init_mm.start_code = (unsigned long) _stext; init_mm.end_code = (unsigned long) _etext; init_mm.end_data = (unsigned long) _edata; init_mm.brk = (unsigned long) _end; @@ -366,7 +366,7 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) * faults in case uaccess_enable() is inadvertently called by the init * thread. */ - init_task.thread_info.ttbr0 = __pa_symbol(empty_zero_page); + init_task.thread_info.ttbr0 = __pa_symbol(reserved_pg_dir); #endif if (boot_args[1] || boot_args[2] || boot_args[3]) { diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index a8184cad8890..af5c6c6638f7 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -922,9 +922,6 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, trace_hardirqs_off(); do { - /* Check valid user FS if needed */ - addr_limit_user_check(); - if (thread_flags & _TIF_NEED_RESCHED) { /* Unmask Debug and SError for the next task */ local_daif_restore(DAIF_PROCCTX_NOIRQ); diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index ba40d57757d6..4be7f7eed875 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -99,7 +99,7 @@ SYM_FUNC_END(__cpu_suspend_enter) .pushsection ".idmap.text", "awx" SYM_CODE_START(cpu_resume) - bl el2_setup // if in EL2 drop to EL1 cleanly + bl init_kernel_el bl __cpu_setup /* enable the MMU early - so we can access sleep_save_stash by va */ adrp x1, swapper_pg_dir diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 18e9727d3f64..2499b895efea 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -787,14 +787,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus) } static const char *ipi_types[NR_IPI] __tracepoint_string = { -#define S(x,s) [x] = s - S(IPI_RESCHEDULE, "Rescheduling interrupts"), - S(IPI_CALL_FUNC, "Function call interrupts"), - S(IPI_CPU_STOP, "CPU stop interrupts"), - S(IPI_CPU_CRASH_STOP, "CPU stop (for crash dump) interrupts"), - S(IPI_TIMER, "Timer broadcast interrupts"), - S(IPI_IRQ_WORK, "IRQ work interrupts"), - S(IPI_WAKEUP, "CPU wake-up interrupts"), + [IPI_RESCHEDULE] = "Rescheduling interrupts", + [IPI_CALL_FUNC] = "Function call interrupts", + [IPI_CPU_STOP] = "CPU stop interrupts", + [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts", + [IPI_TIMER] = "Timer broadcast interrupts", + [IPI_IRQ_WORK] = "IRQ work interrupts", + [IPI_WAKEUP] = "CPU wake-up interrupts", }; static void smp_cross_call(const struct cpumask *target, unsigned int ipinr); diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 96cd347c7a46..a67b37a7a47e 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -58,7 +58,6 @@ void notrace __cpu_suspend_exit(void) * features that might not have been set correctly. */ __uaccess_enable_hw_pan(); - uao_thread_switch(current); /* * Restore HW breakpoint registers to sane values diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index 3c18c2454089..265fe3eb1069 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -68,7 +68,7 @@ do_compat_cache_op(unsigned long start, unsigned long end, int flags) */ long compat_arm_syscall(struct pt_regs *regs, int scno) { - void __user *addr; + unsigned long addr; switch (scno) { /* @@ -111,8 +111,7 @@ long compat_arm_syscall(struct pt_regs *regs, int scno) break; } - addr = (void __user *)instruction_pointer(regs) - - (compat_thumb_mode(regs) ? 2 : 4); + addr = instruction_pointer(regs) - (compat_thumb_mode(regs) ? 2 : 4); arm64_notify_die("Oops - bad compat syscall(2)", regs, SIGILL, ILL_ILLTRP, addr, scno); diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index f8f758e4a306..f61e9d8cc55a 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -122,7 +122,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, cortex_a76_erratum_1463225_svc_handler(); local_daif_restore(DAIF_PROCCTX); - if (system_supports_mte() && (flags & _TIF_MTE_ASYNC_FAULT)) { + if (flags & _TIF_MTE_ASYNC_FAULT) { /* * Process the asynchronous tag check fault before the actual * syscall. do_notify_resume() will send a signal to userspace diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 543c67cae02f..b8026ec684ba 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -124,6 +124,12 @@ int __init parse_acpi_topology(void) #endif #ifdef CONFIG_ARM64_AMU_EXTN +#define read_corecnt() read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0) +#define read_constcnt() read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0) +#else +#define read_corecnt() (0UL) +#define read_constcnt() (0UL) +#endif #undef pr_fmt #define pr_fmt(fmt) "AMU: " fmt @@ -133,54 +139,58 @@ static DEFINE_PER_CPU(u64, arch_const_cycles_prev); static DEFINE_PER_CPU(u64, arch_core_cycles_prev); static cpumask_var_t amu_fie_cpus; -/* Initialize counter reference per-cpu variables for the current CPU */ -void init_cpu_freq_invariance_counters(void) +void update_freq_counters_refs(void) { - this_cpu_write(arch_core_cycles_prev, - read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0)); - this_cpu_write(arch_const_cycles_prev, - read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0)); + this_cpu_write(arch_core_cycles_prev, read_corecnt()); + this_cpu_write(arch_const_cycles_prev, read_constcnt()); } -static int validate_cpu_freq_invariance_counters(int cpu) +static inline bool freq_counters_valid(int cpu) { - u64 max_freq_hz, ratio; + if ((cpu >= nr_cpu_ids) || !cpumask_test_cpu(cpu, cpu_present_mask)) + return false; if (!cpu_has_amu_feat(cpu)) { pr_debug("CPU%d: counters are not supported.\n", cpu); - return -EINVAL; + return false; } if (unlikely(!per_cpu(arch_const_cycles_prev, cpu) || !per_cpu(arch_core_cycles_prev, cpu))) { pr_debug("CPU%d: cycle counters are not enabled.\n", cpu); - return -EINVAL; + return false; } - /* Convert maximum frequency from KHz to Hz and validate */ - max_freq_hz = cpufreq_get_hw_max_freq(cpu) * 1000; - if (unlikely(!max_freq_hz)) { - pr_debug("CPU%d: invalid maximum frequency.\n", cpu); + return true; +} + +static int freq_inv_set_max_ratio(int cpu, u64 max_rate, u64 ref_rate) +{ + u64 ratio; + + if (unlikely(!max_rate || !ref_rate)) { + pr_debug("CPU%d: invalid maximum or reference frequency.\n", + cpu); return -EINVAL; } /* * Pre-compute the fixed ratio between the frequency of the constant - * counter and the maximum frequency of the CPU. + * reference counter and the maximum frequency of the CPU. * - * const_freq - * arch_max_freq_scale = ---------------- * SCHED_CAPACITY_SCALE² - * cpuinfo_max_freq + * ref_rate + * arch_max_freq_scale = ---------- * SCHED_CAPACITY_SCALE² + * max_rate * * We use a factor of 2 * SCHED_CAPACITY_SHIFT -> SCHED_CAPACITY_SCALE² * in order to ensure a good resolution for arch_max_freq_scale for - * very low arch timer frequencies (down to the KHz range which should + * very low reference frequencies (down to the KHz range which should * be unlikely). */ - ratio = (u64)arch_timer_get_rate() << (2 * SCHED_CAPACITY_SHIFT); - ratio = div64_u64(ratio, max_freq_hz); + ratio = ref_rate << (2 * SCHED_CAPACITY_SHIFT); + ratio = div64_u64(ratio, max_rate); if (!ratio) { - WARN_ONCE(1, "System timer frequency too low.\n"); + WARN_ONCE(1, "Reference frequency too low.\n"); return -EINVAL; } @@ -227,8 +237,12 @@ static int __init init_amu_fie(void) } for_each_present_cpu(cpu) { - if (validate_cpu_freq_invariance_counters(cpu)) + if (!freq_counters_valid(cpu) || + freq_inv_set_max_ratio(cpu, + cpufreq_get_hw_max_freq(cpu) * 1000, + arch_timer_get_rate())) continue; + cpumask_set_cpu(cpu, valid_cpus); have_policy |= enable_policy_freq_counters(cpu, valid_cpus); } @@ -280,11 +294,14 @@ void topology_scale_freq_tick(void) if (!cpumask_test_cpu(cpu, amu_fie_cpus)) return; - const_cnt = read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0); - core_cnt = read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0); prev_const_cnt = this_cpu_read(arch_const_cycles_prev); prev_core_cnt = this_cpu_read(arch_core_cycles_prev); + update_freq_counters_refs(); + + const_cnt = this_cpu_read(arch_const_cycles_prev); + core_cnt = this_cpu_read(arch_core_cycles_prev); + if (unlikely(core_cnt <= prev_core_cnt || const_cnt <= prev_const_cnt)) goto store_and_exit; @@ -309,4 +326,71 @@ store_and_exit: this_cpu_write(arch_core_cycles_prev, core_cnt); this_cpu_write(arch_const_cycles_prev, const_cnt); } -#endif /* CONFIG_ARM64_AMU_EXTN */ + +#ifdef CONFIG_ACPI_CPPC_LIB +#include <acpi/cppc_acpi.h> + +static void cpu_read_corecnt(void *val) +{ + *(u64 *)val = read_corecnt(); +} + +static void cpu_read_constcnt(void *val) +{ + *(u64 *)val = read_constcnt(); +} + +static inline +int counters_read_on_cpu(int cpu, smp_call_func_t func, u64 *val) +{ + /* + * Abort call on counterless CPU or when interrupts are + * disabled - can lead to deadlock in smp sync call. + */ + if (!cpu_has_amu_feat(cpu)) + return -EOPNOTSUPP; + + if (WARN_ON_ONCE(irqs_disabled())) + return -EPERM; + + smp_call_function_single(cpu, func, val, 1); + + return 0; +} + +/* + * Refer to drivers/acpi/cppc_acpi.c for the description of the functions + * below. + */ +bool cpc_ffh_supported(void) +{ + return freq_counters_valid(get_cpu_with_amu_feat()); +} + +int cpc_read_ffh(int cpu, struct cpc_reg *reg, u64 *val) +{ + int ret = -EOPNOTSUPP; + + switch ((u64)reg->address) { + case 0x0: + ret = counters_read_on_cpu(cpu, cpu_read_corecnt, val); + break; + case 0x1: + ret = counters_read_on_cpu(cpu, cpu_read_constcnt, val); + break; + } + + if (!ret) { + *val &= GENMASK_ULL(reg->bit_offset + reg->bit_width - 1, + reg->bit_offset); + *val >>= reg->bit_offset; + } + + return ret; +} + +int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val) +{ + return -EOPNOTSUPP; +} +#endif /* CONFIG_ACPI_CPPC_LIB */ diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 2059d8f43f55..08156be75569 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -171,32 +171,32 @@ static void arm64_show_signal(int signo, const char *str) __show_regs(regs); } -void arm64_force_sig_fault(int signo, int code, void __user *addr, +void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *str) { arm64_show_signal(signo, str); if (signo == SIGKILL) force_sig(SIGKILL); else - force_sig_fault(signo, code, addr); + force_sig_fault(signo, code, (void __user *)far); } -void arm64_force_sig_mceerr(int code, void __user *addr, short lsb, +void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str) { arm64_show_signal(SIGBUS, str); - force_sig_mceerr(code, addr, lsb); + force_sig_mceerr(code, (void __user *)far, lsb); } -void arm64_force_sig_ptrace_errno_trap(int errno, void __user *addr, +void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far, const char *str) { arm64_show_signal(SIGTRAP, str); - force_sig_ptrace_errno_trap(errno, addr); + force_sig_ptrace_errno_trap(errno, (void __user *)far); } void arm64_notify_die(const char *str, struct pt_regs *regs, - int signo, int sicode, void __user *addr, + int signo, int sicode, unsigned long far, int err) { if (user_mode(regs)) { @@ -204,7 +204,7 @@ void arm64_notify_die(const char *str, struct pt_regs *regs, current->thread.fault_address = 0; current->thread.fault_code = err; - arm64_force_sig_fault(signo, sicode, addr, str); + arm64_force_sig_fault(signo, sicode, far, str); } else { die(str, regs, err); } @@ -375,7 +375,7 @@ void force_signal_inject(int signal, int code, unsigned long address, unsigned i signal = SIGKILL; } - arm64_notify_die(desc, regs, signal, code, (void __user *)address, err); + arm64_notify_die(desc, regs, signal, code, address, err); } /* @@ -386,7 +386,7 @@ void arm64_notify_segfault(unsigned long addr) int code; mmap_read_lock(current->mm); - if (find_vma(current->mm, addr) == NULL) + if (find_vma(current->mm, untagged_addr(addr)) == NULL) code = SEGV_MAPERR; else code = SEGV_ACCERR; @@ -449,12 +449,13 @@ NOKPROBE_SYMBOL(do_ptrauth_fault); static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) { - unsigned long address; + unsigned long tagged_address, address; int rt = ESR_ELx_SYS64_ISS_RT(esr); int crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT; int ret = 0; - address = untagged_addr(pt_regs_read_reg(regs, rt)); + tagged_address = pt_regs_read_reg(regs, rt); + address = untagged_addr(tagged_address); switch (crm) { case ESR_ELx_SYS64_ISS_CRM_DC_CVAU: /* DC CVAU, gets promoted */ @@ -481,7 +482,7 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) } if (ret) - arm64_notify_segfault(address); + arm64_notify_segfault(tagged_address); else arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); } @@ -775,7 +776,7 @@ asmlinkage void notrace bad_mode(struct pt_regs *regs, int reason, unsigned int */ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr) { - void __user *pc = (void __user *)instruction_pointer(regs); + unsigned long pc = instruction_pointer(regs); current->thread.fault_address = 0; current->thread.fault_code = esr; diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index d65f52264aba..a8f8e409e2bf 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -28,7 +28,7 @@ ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \ $(btildflags-y) -T ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18 -ccflags-y += -DDISABLE_BRANCH_PROFILING +ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) $(GCC_PLUGINS_CFLAGS) KASAN_SANITIZE := n diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 79280c53b9a6..a1e0f91e6cea 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -48,7 +48,7 @@ cc32-as-instr = $(call try-run,\ # As a result we set our own flags here. # KBUILD_CPPFLAGS and NOSTDINC_FLAGS from top-level Makefile -VDSO_CPPFLAGS := -D__KERNEL__ -nostdinc -isystem $(shell $(CC_COMPAT) -print-file-name=include) +VDSO_CPPFLAGS := -DBUILD_VDSO -D__KERNEL__ -nostdinc -isystem $(shell $(CC_COMPAT) -print-file-name=include) VDSO_CPPFLAGS += $(LINUXINCLUDE) # Common C and assembly flags diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 1bda604f4c70..5d5857c5b025 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -121,7 +121,7 @@ SECTIONS _text = .; HEAD_TEXT } - .text : { /* Real text segment */ + .text : ALIGN(SEGMENT_ALIGN) { /* Real text segment */ _stext = .; /* Text and read-only data */ IRQENTRY_TEXT SOFTIRQENTRY_TEXT @@ -164,13 +164,11 @@ SECTIONS . += PAGE_SIZE; #endif -#ifdef CONFIG_ARM64_SW_TTBR0_PAN - reserved_ttbr0 = .; - . += RESERVED_TTBR0_SIZE; -#endif + reserved_pg_dir = .; + . += PAGE_SIZE; + swapper_pg_dir = .; . += PAGE_SIZE; - swapper_pg_end = .; . = ALIGN(SEGMENT_ALIGN); __init_begin = .; @@ -201,7 +199,7 @@ SECTIONS INIT_CALLS CON_INITCALL INIT_RAM_FS - *(.init.rodata.* .init.bss) /* from the EFI stub */ + *(.init.altinstructions .init.bss) /* from the EFI stub */ } .exit.data : { EXIT_DATA |