diff options
Diffstat (limited to 'arch/s390')
89 files changed, 1879 insertions, 1533 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index c72874f09741..c1ff874e6c2e 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -58,6 +58,7 @@ config S390 # Note: keep this list sorted alphabetically # imply IMA_SECURE_AND_OR_TRUSTED_BOOT + select ARCH_32BIT_USTAT_F_TINODE select ARCH_BINFMT_ELF_STATE select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEBUG_WX @@ -123,11 +124,13 @@ config S390 select GENERIC_ALLOCATOR select GENERIC_CPU_AUTOPROBE select GENERIC_CPU_VULNERABILITIES + select GENERIC_ENTRY select GENERIC_FIND_FIRST_BIT select GENERIC_GETTIMEOFDAY select GENERIC_PTDUMP select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL + select GENERIC_VDSO_TIME_NS select HAVE_ALIGNED_STRUCT_PAGE if SLUB select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_JUMP_LABEL @@ -174,7 +177,6 @@ config S390 select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI select HAVE_NOP_MCOUNT - select HAVE_OPROFILE select HAVE_PCI select HAVE_PERF_EVENTS select HAVE_PERF_REGS @@ -182,6 +184,7 @@ config S390 select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE select HAVE_RSEQ + select HAVE_SOFTIRQ_ON_OWN_STACK select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING select HAVE_VIRT_CPU_ACCOUNTING_IDLE @@ -426,7 +429,6 @@ config 64BIT config COMPAT def_bool y prompt "Kernel support for 31 bit emulation" - select COMPAT_BINFMT_ELF if BINFMT_ELF select ARCH_WANT_OLD_COMPAT_IPC select COMPAT_OLD_SIGACTION select HAVE_UID16 diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug index 6bfaceebbbc0..ef96c25fa921 100644 --- a/arch/s390/Kconfig.debug +++ b/arch/s390/Kconfig.debug @@ -6,10 +6,12 @@ config TRACE_IRQFLAGS_SUPPORT config EARLY_PRINTK def_bool y -config DEBUG_USER_ASCE - bool "Debug User ASCE" +config DEBUG_ENTRY + bool "Debug low-level entry code" + depends on DEBUG_KERNEL help - Check on exit to user space that address space control - elements are setup correctly. + This option enables sanity checks in s390 low-level entry code. + Some of these sanity checks may slow down kernel entries and + exits or otherwise impact performance. If unsure, say N. diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 8db267d2a543..e443ed9947bd 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -134,9 +134,6 @@ core-y += arch/s390/ libs-y += arch/s390/lib/ drivers-y += drivers/s390/ -# must be linked after kernel -drivers-$(CONFIG_OPROFILE) += arch/s390/oprofile/ - boot := arch/s390/boot syscalls := arch/s390/kernel/syscalls tools := arch/s390/tools diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index c4f6ff98a612..02056b024091 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -40,6 +40,7 @@ CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_LIVEPATCH=y +CONFIG_MARCH_ZEC12=y CONFIG_TUNE_ZEC12=y CONFIG_NR_CPUS=512 CONFIG_NUMA=y @@ -57,7 +58,6 @@ CONFIG_CMM=m CONFIG_APPLDATA_BASE=y CONFIG_KVM=m CONFIG_S390_UNWIND_SELFTEST=y -CONFIG_OPROFILE=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y CONFIG_STATIC_KEYS_SELFTEST=y @@ -71,7 +71,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y CONFIG_MODULE_SIG_SHA256=y -CONFIG_UNUSED_SYMBOLS=y CONFIG_BLK_DEV_INTEGRITY=y CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y @@ -177,13 +176,17 @@ CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_CT_NETLINK_TIMEOUT=m CONFIG_NF_TABLES=m +CONFIG_NF_TABLES_INET=y CONFIG_NFT_CT=m CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m CONFIG_NFT_NAT=m +CONFIG_NFT_OBJREF=m +CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NFT_HASH=m +CONFIG_NFT_FIB_INET=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_AUDIT=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m @@ -275,6 +278,7 @@ CONFIG_IP_VS_NQ=m CONFIG_IP_VS_FTP=m CONFIG_IP_VS_PE_SIP=m CONFIG_NF_TABLES_IPV4=y +CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_AH=m @@ -295,6 +299,7 @@ CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_TABLES_IPV6=y +CONFIG_NFT_FIB_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -630,7 +635,6 @@ CONFIG_NTFS_RW=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y -CONFIG_TMPFS_INODE64=y CONFIG_HUGETLBFS=y CONFIG_CONFIGFS_FS=m CONFIG_ECRYPT_FS=m @@ -792,6 +796,8 @@ CONFIG_DEBUG_OBJECTS_RCU_HEAD=y CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y CONFIG_SLUB_DEBUG_ON=y CONFIG_SLUB_STATS=y +CONFIG_DEBUG_KMEMLEAK=y +CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_VM=y CONFIG_DEBUG_VM_VMACACHE=y @@ -832,7 +838,6 @@ CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_HIST_TRIGGERS=y CONFIG_FTRACE_STARTUP_TEST=y # CONFIG_EVENT_TRACE_STARTUP_TEST is not set -CONFIG_DEBUG_USER_ASCE=y CONFIG_NOTIFIER_ERROR_INJECTION=m CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m CONFIG_FAULT_INJECTION=y @@ -856,3 +861,4 @@ CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y CONFIG_TEST_BITOPS=m CONFIG_TEST_BPF=m +CONFIG_DEBUG_ENTRY=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 51135893cffe..bac721a501da 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -38,6 +38,7 @@ CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_LIVEPATCH=y +CONFIG_MARCH_ZEC12=y CONFIG_TUNE_ZEC12=y CONFIG_NR_CPUS=512 CONFIG_NUMA=y @@ -55,7 +56,6 @@ CONFIG_CMM=m CONFIG_APPLDATA_BASE=y CONFIG_KVM=m CONFIG_S390_UNWIND_SELFTEST=m -CONFIG_OPROFILE=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y # CONFIG_GCC_PLUGINS is not set @@ -66,7 +66,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y CONFIG_MODULE_SIG_SHA256=y -CONFIG_UNUSED_SYMBOLS=y CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y CONFIG_BLK_CGROUP_IOLATENCY=y @@ -168,13 +167,17 @@ CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_CT_NETLINK_TIMEOUT=m CONFIG_NF_TABLES=m +CONFIG_NF_TABLES_INET=y CONFIG_NFT_CT=m CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m CONFIG_NFT_NAT=m +CONFIG_NFT_OBJREF=m +CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NFT_HASH=m +CONFIG_NFT_FIB_INET=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_AUDIT=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m @@ -266,6 +269,7 @@ CONFIG_IP_VS_NQ=m CONFIG_IP_VS_FTP=m CONFIG_IP_VS_PE_SIP=m CONFIG_NF_TABLES_IPV4=y +CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_AH=m @@ -286,6 +290,7 @@ CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_TABLES_IPV6=y +CONFIG_NFT_FIB_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -618,7 +623,6 @@ CONFIG_NTFS_RW=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y -CONFIG_TMPFS_INODE64=y CONFIG_HUGETLBFS=y CONFIG_CONFIGFS_FS=m CONFIG_ECRYPT_FS=m @@ -780,7 +784,6 @@ CONFIG_FTRACE_SYSCALLS=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_HIST_TRIGGERS=y -CONFIG_DEBUG_USER_ASCE=y CONFIG_LKDTM=m CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 1ef211dae77a..acf982a2ae4c 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -3,11 +3,13 @@ CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y # CONFIG_CPU_ISOLATION is not set # CONFIG_UTS_NS is not set +# CONFIG_TIME_NS is not set # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_COMPAT_BRK is not set +CONFIG_MARCH_ZEC12=y CONFIG_TUNE_ZEC12=y # CONFIG_COMPAT is not set CONFIG_NR_CPUS=2 diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 73044634d342..54c7536f2482 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -21,6 +21,7 @@ #include <crypto/algapi.h> #include <crypto/ghash.h> #include <crypto/internal/aead.h> +#include <crypto/internal/cipher.h> #include <crypto/internal/skcipher.h> #include <crypto/scatterwalk.h> #include <linux/err.h> @@ -1055,3 +1056,4 @@ MODULE_ALIAS_CRYPTO("aes-all"); MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(CRYPTO_INTERNAL); diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index f3caeb17c85b..a279b7d23a5e 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -22,6 +22,7 @@ #include <linux/init.h> #include <linux/mutex.h> #include <linux/spinlock.h> +#include <linux/delay.h> #include <crypto/internal/skcipher.h> #include <crypto/xts.h> #include <asm/cpacf.h> @@ -128,6 +129,9 @@ static inline int __paes_keyblob2pkey(struct key_blob *kb, /* try three times in case of failure */ for (i = 0; i < 3; i++) { + if (i > 0 && ret == -EAGAIN && in_task()) + if (msleep_interruptible(1000)) + return -EINTR; ret = pkey_keyblob2pkey(kb->key, kb->keylen, pk); if (ret == 0) break; @@ -138,10 +142,12 @@ static inline int __paes_keyblob2pkey(struct key_blob *kb, static inline int __paes_convert_key(struct s390_paes_ctx *ctx) { + int ret; struct pkey_protkey pkey; - if (__paes_keyblob2pkey(&ctx->kb, &pkey)) - return -EINVAL; + ret = __paes_keyblob2pkey(&ctx->kb, &pkey); + if (ret) + return ret; spin_lock_bh(&ctx->pk_lock); memcpy(&ctx->pk, &pkey, sizeof(pkey)); @@ -169,10 +175,12 @@ static void ecb_paes_exit(struct crypto_skcipher *tfm) static inline int __ecb_paes_set_key(struct s390_paes_ctx *ctx) { + int rc; unsigned long fc; - if (__paes_convert_key(ctx)) - return -EINVAL; + rc = __paes_convert_key(ctx); + if (rc) + return rc; /* Pick the correct function code based on the protected key type */ fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KM_PAES_128 : @@ -282,10 +290,12 @@ static void cbc_paes_exit(struct crypto_skcipher *tfm) static inline int __cbc_paes_set_key(struct s390_paes_ctx *ctx) { + int rc; unsigned long fc; - if (__paes_convert_key(ctx)) - return -EINVAL; + rc = __paes_convert_key(ctx); + if (rc) + return rc; /* Pick the correct function code based on the protected key type */ fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KMC_PAES_128 : @@ -577,10 +587,12 @@ static void ctr_paes_exit(struct crypto_skcipher *tfm) static inline int __ctr_paes_set_key(struct s390_paes_ctx *ctx) { + int rc; unsigned long fc; - if (__paes_convert_key(ctx)) - return -EINVAL; + rc = __paes_convert_key(ctx); + if (rc) + return rc; /* Pick the correct function code based on the protected key type */ fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KMCTR_PAES_128 : diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index b2f219ec379c..234d791ca59d 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -414,7 +414,7 @@ static int __init prng_sha512_instantiate(void) } /* append the seed by 16 bytes of unique nonce */ - get_tod_clock_ext(seed + seedlen); + store_tod_clock_ext((union tod_clock *)(seed + seedlen)); seedlen += 16; /* now initial seed of the prno drng */ diff --git a/arch/s390/hypfs/hypfs_diag0c.c b/arch/s390/hypfs/hypfs_diag0c.c index 3235e4d82f2d..6c43d2ba2079 100644 --- a/arch/s390/hypfs/hypfs_diag0c.c +++ b/arch/s390/hypfs/hypfs_diag0c.c @@ -84,7 +84,7 @@ static int dbfs_diag0c_create(void **data, void **data_free_ptr, size_t *size) if (IS_ERR(diag0c_data)) return PTR_ERR(diag0c_data); memset(&diag0c_data->hdr, 0, sizeof(diag0c_data->hdr)); - get_tod_clock_ext(diag0c_data->hdr.tod_ext); + store_tod_clock_ext((union tod_clock *)diag0c_data->hdr.tod_ext); diag0c_data->hdr.len = count * sizeof(struct hypfs_diag0c_entry); diag0c_data->hdr.version = DBFS_D0C_HDR_VERSION; diag0c_data->hdr.count = count; diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c index e1fcc03159ef..33f973ff9744 100644 --- a/arch/s390/hypfs/hypfs_vm.c +++ b/arch/s390/hypfs/hypfs_vm.c @@ -234,7 +234,7 @@ failed: struct dbfs_d2fc_hdr { u64 len; /* Length of d2fc buffer without header */ u16 version; /* Version of header */ - char tod_ext[STORE_CLOCK_EXT_SIZE]; /* TOD clock for d2fc */ + union tod_clock tod_ext; /* TOD clock for d2fc */ u64 count; /* Number of VM guests in d2fc buffer */ char reserved[30]; } __attribute__ ((packed)); @@ -252,7 +252,7 @@ static int dbfs_diag2fc_create(void **data, void **data_free_ptr, size_t *size) d2fc = diag2fc_store(guest_query, &count, sizeof(d2fc->hdr)); if (IS_ERR(d2fc)) return PTR_ERR(d2fc); - get_tod_clock_ext(d2fc->hdr.tod_ext); + store_tod_clock_ext(&d2fc->hdr.tod_ext); d2fc->hdr.len = count * sizeof(struct diag2fc_data); d2fc->hdr.version = DBFS_D2FC_HDR_VERSION; d2fc->hdr.count = count; diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h index 1c8a38f762a3..d3880ca764ee 100644 --- a/arch/s390/include/asm/alternative.h +++ b/arch/s390/include/asm/alternative.h @@ -145,6 +145,22 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end); asm_inline volatile(ALTERNATIVE_2(oldinstr, altinstr1, facility1, \ altinstr2, facility2) ::: "memory") +/* Alternative inline assembly with input. */ +#define alternative_input(oldinstr, newinstr, feature, input...) \ + asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ + : : input) + +/* Like alternative_input, but with a single output argument */ +#define alternative_io(oldinstr, altinstr, facility, output, input...) \ + asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, facility) \ + : output : input) + +/* Use this macro if more than one output parameter is needed. */ +#define ASM_OUTPUT2(a...) a + +/* Use this macro if clobbers are needed without inputs. */ +#define ASM_NO_INPUT_CLOBBER(clobber...) : clobber + #endif /* __ASSEMBLY__ */ #endif /* _ASM_S390_ALTERNATIVE_H */ diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h index aea32dda3d14..837d1699b109 100644 --- a/arch/s390/include/asm/ap.h +++ b/arch/s390/include/asm/ap.h @@ -368,7 +368,7 @@ static inline struct ap_queue_status ap_dqap(ap_qid_t qid, #if IS_ENABLED(CONFIG_ZCRYPT) void ap_bus_cfg_chg(void); #else -static inline void ap_bus_cfg_chg(void){}; +static inline void ap_bus_cfg_chg(void){} #endif #endif /* _ASM_S390_AP_H_ */ diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index 11c5952e1afa..5860ae790f2d 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -44,16 +44,6 @@ static inline int atomic_fetch_add(int i, atomic_t *v) static inline void atomic_add(int i, atomic_t *v) { -#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES - /* - * Order of conditions is important to circumvent gcc 10 bug: - * https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549318.html - */ - if ((i > -129) && (i < 128) && __builtin_constant_p(i)) { - __atomic_add_const(i, &v->counter); - return; - } -#endif __atomic_add(i, &v->counter); } @@ -115,16 +105,6 @@ static inline s64 atomic64_fetch_add(s64 i, atomic64_t *v) static inline void atomic64_add(s64 i, atomic64_t *v) { -#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES - /* - * Order of conditions is important to circumvent gcc 10 bug: - * https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549318.html - */ - if ((i > -129) && (i < 128) && __builtin_constant_p(i)) { - __atomic64_add_const(i, (long *)&v->counter); - return; - } -#endif __atomic64_add(i, (long *)&v->counter); } diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index 431e208a5ea4..31121d32f81d 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -61,18 +61,6 @@ static __always_inline void arch_set_bit(unsigned long nr, volatile unsigned lon unsigned long *addr = __bitops_word(nr, ptr); unsigned long mask; -#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES - if (__builtin_constant_p(nr)) { - unsigned char *caddr = __bitops_byte(nr, ptr); - - asm volatile( - "oi %0,%b1\n" - : "+Q" (*caddr) - : "i" (1 << (nr & 7)) - : "cc", "memory"); - return; - } -#endif mask = 1UL << (nr & (BITS_PER_LONG - 1)); __atomic64_or(mask, (long *)addr); } @@ -82,18 +70,6 @@ static __always_inline void arch_clear_bit(unsigned long nr, volatile unsigned l unsigned long *addr = __bitops_word(nr, ptr); unsigned long mask; -#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES - if (__builtin_constant_p(nr)) { - unsigned char *caddr = __bitops_byte(nr, ptr); - - asm volatile( - "ni %0,%b1\n" - : "+Q" (*caddr) - : "i" (~(1 << (nr & 7))) - : "cc", "memory"); - return; - } -#endif mask = ~(1UL << (nr & (BITS_PER_LONG - 1))); __atomic64_and(mask, (long *)addr); } @@ -104,18 +80,6 @@ static __always_inline void arch_change_bit(unsigned long nr, unsigned long *addr = __bitops_word(nr, ptr); unsigned long mask; -#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES - if (__builtin_constant_p(nr)) { - unsigned char *caddr = __bitops_byte(nr, ptr); - - asm volatile( - "xi %0,%b1\n" - : "+Q" (*caddr) - : "i" (1 << (nr & 7)) - : "cc", "memory"); - return; - } -#endif mask = 1UL << (nr & (BITS_PER_LONG - 1)); __atomic64_xor(mask, (long *)addr); } diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index cb729d111e20..1d389847b588 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -35,4 +35,6 @@ u64 arch_cpu_idle_time(int cpu); #define arch_idle_time(cpu) arch_cpu_idle_time(cpu) +void account_idle_time_irq(void); + #endif /* _S390_CPUTIME_H */ diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 5775fc22f410..66d51ad090ab 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -233,8 +233,7 @@ extern char elf_platform[]; do { \ set_personality(PER_LINUX | \ (current->personality & (~PER_MASK))); \ - current->thread.sys_call_table = \ - (unsigned long) &sys_call_table; \ + current->thread.sys_call_table = sys_call_table; \ } while (0) #else /* CONFIG_COMPAT */ #define SET_PERSONALITY(ex) \ @@ -245,11 +244,11 @@ do { \ if ((ex).e_ident[EI_CLASS] == ELFCLASS32) { \ set_thread_flag(TIF_31BIT); \ current->thread.sys_call_table = \ - (unsigned long) &sys_call_table_emu; \ + sys_call_table_emu; \ } else { \ clear_thread_flag(TIF_31BIT); \ current->thread.sys_call_table = \ - (unsigned long) &sys_call_table; \ + sys_call_table; \ } \ } while (0) #endif /* CONFIG_COMPAT */ diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h new file mode 100644 index 000000000000..75cebc80474e --- /dev/null +++ b/arch/s390/include/asm/entry-common.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ARCH_S390_ENTRY_COMMON_H +#define ARCH_S390_ENTRY_COMMON_H + +#include <linux/sched.h> +#include <linux/audit.h> +#include <linux/tracehook.h> +#include <linux/processor.h> +#include <linux/uaccess.h> +#include <asm/fpu/api.h> + +#define ARCH_EXIT_TO_USER_MODE_WORK (_TIF_GUARDED_STORAGE | _TIF_PER_TRAP) + +void do_per_trap(struct pt_regs *regs); +void do_syscall(struct pt_regs *regs); + +typedef void (*pgm_check_func)(struct pt_regs *regs); + +extern pgm_check_func pgm_check_table[128]; + +#ifdef CONFIG_DEBUG_ENTRY +static __always_inline void arch_check_user_regs(struct pt_regs *regs) +{ + debug_user_asce(0); +} + +#define arch_check_user_regs arch_check_user_regs +#endif /* CONFIG_DEBUG_ENTRY */ + +static __always_inline void arch_exit_to_user_mode_work(struct pt_regs *regs, + unsigned long ti_work) +{ + if (ti_work & _TIF_PER_TRAP) { + clear_thread_flag(TIF_PER_TRAP); + do_per_trap(regs); + } + + if (ti_work & _TIF_GUARDED_STORAGE) + gs_load_bc_cb(regs); +} + +#define arch_exit_to_user_mode_work arch_exit_to_user_mode_work + +static __always_inline void arch_exit_to_user_mode(void) +{ + if (test_cpu_flag(CIF_FPU)) + __load_fpu_regs(); + + if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) + debug_user_asce(1); +} + +#define arch_exit_to_user_mode arch_exit_to_user_mode + +static inline bool on_thread_stack(void) +{ + return !(((unsigned long)(current->stack) ^ current_stack_pointer()) & ~(THREAD_SIZE - 1)); +} + +#endif diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h index 68c476b20b57..91b5d714d28f 100644 --- a/arch/s390/include/asm/facility.h +++ b/arch/s390/include/asm/facility.h @@ -44,7 +44,7 @@ static inline int __test_facility(unsigned long nr, void *facilities) } /* - * The test_facility function uses the bit odering where the MSB is bit 0. + * The test_facility function uses the bit ordering where the MSB is bit 0. * That makes it easier to query facility bits with the bit number as * documented in the Principles of Operation. */ diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h index 34a7ae68485c..a959b815a58b 100644 --- a/arch/s390/include/asm/fpu/api.h +++ b/arch/s390/include/asm/fpu/api.h @@ -47,6 +47,8 @@ #include <linux/preempt.h> void save_fpu_regs(void); +void load_fpu_regs(void); +void __load_fpu_regs(void); static inline int test_fp_ctl(u32 fpc) { diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h index dfbc3c6c0674..58668ffb5488 100644 --- a/arch/s390/include/asm/hardirq.h +++ b/arch/s390/include/asm/hardirq.h @@ -18,7 +18,6 @@ #define or_softirq_pending(x) (S390_lowcore.softirq_pending |= (x)) #define __ARCH_IRQ_STAT -#define __ARCH_HAS_DO_SOFTIRQ #define __ARCH_IRQ_EXIT_IRQS_DISABLED static inline void ack_bad_irq(unsigned int irq) diff --git a/arch/s390/include/asm/idle.h b/arch/s390/include/asm/idle.h index 6d4226dcf42a..b04f6a794cdf 100644 --- a/arch/s390/include/asm/idle.h +++ b/arch/s390/include/asm/idle.h @@ -20,11 +20,13 @@ struct s390_idle_data { unsigned long long clock_idle_exit; unsigned long long timer_idle_enter; unsigned long long timer_idle_exit; + unsigned long mt_cycles_enter[8]; }; extern struct device_attribute dev_attr_idle_count; extern struct device_attribute dev_attr_idle_time_us; -void psw_idle(struct s390_idle_data *, unsigned long); +void psw_idle(struct s390_idle_data *data, unsigned long psw_mask); +void psw_idle_exit(void); #endif /* _S390_IDLE_H */ diff --git a/arch/s390/include/asm/irq_work.h b/arch/s390/include/asm/irq_work.h new file mode 100644 index 000000000000..603783766d0a --- /dev/null +++ b/arch/s390/include/asm/irq_work.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_S390_IRQ_WORK_H +#define _ASM_S390_IRQ_WORK_H + +static inline bool arch_irq_work_has_interrupt(void) +{ + return true; +} + +void arch_irq_work_raise(void); + +#endif /* _ASM_S390_IRQ_WORK_H */ diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 74f9a036bab2..6bcfc5614bbc 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -28,7 +28,6 @@ #define KVM_S390_BSCA_CPU_SLOTS 64 #define KVM_S390_ESCA_CPU_SLOTS 248 #define KVM_MAX_VCPUS 255 -#define KVM_USER_MEM_SLOTS 32 /* * These seem to be used for allocating ->chip in the routing table, which we diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 69ce9191eaf1..22bceeeba4bc 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -81,8 +81,8 @@ struct lowcore { psw_t return_mcck_psw; /* 0x02a0 */ /* CPU accounting and timing values. */ - __u64 sync_enter_timer; /* 0x02b0 */ - __u64 async_enter_timer; /* 0x02b8 */ + __u64 sys_enter_timer; /* 0x02b0 */ + __u8 pad_0x02b8[0x02c0-0x02b8]; /* 0x02b8 */ __u64 mcck_enter_timer; /* 0x02c0 */ __u64 exit_timer; /* 0x02c8 */ __u64 user_timer; /* 0x02d0 */ @@ -107,16 +107,15 @@ struct lowcore { __u64 async_stack; /* 0x0350 */ __u64 nodat_stack; /* 0x0358 */ __u64 restart_stack; /* 0x0360 */ - + __u64 mcck_stack; /* 0x0368 */ /* Restart function and parameter. */ - __u64 restart_fn; /* 0x0368 */ - __u64 restart_data; /* 0x0370 */ - __u64 restart_source; /* 0x0378 */ + __u64 restart_fn; /* 0x0370 */ + __u64 restart_data; /* 0x0378 */ + __u64 restart_source; /* 0x0380 */ /* Address space pointer. */ - __u64 kernel_asce; /* 0x0380 */ - __u64 user_asce; /* 0x0388 */ - __u8 pad_0x0390[0x0398-0x0390]; /* 0x0390 */ + __u64 kernel_asce; /* 0x0388 */ + __u64 user_asce; /* 0x0390 */ /* * The lpp and current_pid fields form a diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h index 5afee80cff58..20e51c9ff240 100644 --- a/arch/s390/include/asm/nmi.h +++ b/arch/s390/include/asm/nmi.h @@ -99,6 +99,7 @@ int nmi_alloc_per_cpu(struct lowcore *lc); void nmi_free_per_cpu(struct lowcore *lc); void s390_handle_mcck(void); +void __s390_handle_mcck(void); int s390_do_machine_check(struct pt_regs *regs); #endif /* __ASSEMBLY__ */ diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 212628932ddc..053fe8b8dec7 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -201,7 +201,7 @@ extern unsigned int s390_pci_no_rid; Prototypes ----------------------------------------------------------------------------- */ /* Base stuff */ -int zpci_create_device(struct zpci_dev *); +int zpci_create_device(u32 fid, u32 fh, enum zpci_state state); void zpci_remove_device(struct zpci_dev *zdev); int zpci_enable_device(struct zpci_dev *); int zpci_disable_device(struct zpci_dev *); @@ -212,7 +212,7 @@ void zpci_remove_reserved_devices(void); /* CLP */ int clp_setup_writeback_mio(void); int clp_scan_pci_devices(void); -int clp_add_pci_device(u32, u32, int); +int clp_query_pci_fn(struct zpci_dev *zdev); int clp_enable_fh(struct zpci_dev *, u8); int clp_disable_fh(struct zpci_dev *); int clp_get_state(u32 fid, enum zpci_state *state); diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index d1297d6bbdcf..6b187cd72251 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -135,7 +135,7 @@ static inline void pmd_populate(struct mm_struct *mm, #define pmd_populate_kernel(mm, pmd, pte) pmd_populate(mm, pmd, pte) #define pmd_pgtable(pmd) \ - (pgtable_t)(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE) + ((pgtable_t)__va(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE)) /* * page table entry allocation/free routines. diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 794746a32806..29c7ecd5ad1d 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1219,8 +1219,8 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) -#define p4d_deref(pud) (p4d_val(pud) & _REGION_ENTRY_ORIGIN) -#define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) +#define p4d_deref(pud) ((unsigned long)__va(p4d_val(pud) & _REGION_ENTRY_ORIGIN)) +#define pgd_deref(pgd) ((unsigned long)__va(pgd_val(pgd) & _REGION_ENTRY_ORIGIN)) static inline unsigned long pmd_deref(pmd_t pmd) { @@ -1229,12 +1229,12 @@ static inline unsigned long pmd_deref(pmd_t pmd) origin_mask = _SEGMENT_ENTRY_ORIGIN; if (pmd_large(pmd)) origin_mask = _SEGMENT_ENTRY_ORIGIN_LARGE; - return pmd_val(pmd) & origin_mask; + return (unsigned long)__va(pmd_val(pmd) & origin_mask); } static inline unsigned long pmd_pfn(pmd_t pmd) { - return pmd_deref(pmd) >> PAGE_SHIFT; + return __pa(pmd_deref(pmd)) >> PAGE_SHIFT; } static inline unsigned long pud_deref(pud_t pud) @@ -1244,12 +1244,12 @@ static inline unsigned long pud_deref(pud_t pud) origin_mask = _REGION_ENTRY_ORIGIN; if (pud_large(pud)) origin_mask = _REGION3_ENTRY_ORIGIN_LARGE; - return pud_val(pud) & origin_mask; + return (unsigned long)__va(pud_val(pud) & origin_mask); } static inline unsigned long pud_pfn(pud_t pud) { - return pud_deref(pud) >> PAGE_SHIFT; + return __pa(pud_deref(pud)) >> PAGE_SHIFT; } /* @@ -1329,7 +1329,7 @@ static inline bool gup_fast_permitted(unsigned long start, unsigned long end) } #define gup_fast_permitted gup_fast_permitted -#define pfn_pte(pfn,pgprot) mk_pte_phys(__pa((pfn) << PAGE_SHIFT),(pgprot)) +#define pfn_pte(pfn, pgprot) mk_pte_phys(((pfn) << PAGE_SHIFT), (pgprot)) #define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT) #define pte_page(x) pfn_to_page(pte_pfn(x)) @@ -1636,7 +1636,7 @@ static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, } #define pmdp_collapse_flush pmdp_collapse_flush -#define pfn_pmd(pfn, pgprot) mk_pmd_phys(__pa((pfn) << PAGE_SHIFT), (pgprot)) +#define pfn_pmd(pfn, pgprot) mk_pmd_phys(((pfn) << PAGE_SHIFT), (pgprot)) #define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) static inline int pmd_trans_huge(pmd_t pmd) diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index 6ede29907fbf..b49e0492842c 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -131,9 +131,9 @@ static inline bool should_resched(int preempt_offset) #endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */ #ifdef CONFIG_PREEMPTION -extern asmlinkage void preempt_schedule(void); +extern void preempt_schedule(void); #define __preempt_schedule() preempt_schedule() -extern asmlinkage void preempt_schedule_notrace(void); +extern void preempt_schedule_notrace(void); #define __preempt_schedule_notrace() preempt_schedule_notrace() #endif /* CONFIG_PREEMPTION */ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 2058a435add4..023a15dc25a3 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -38,6 +38,9 @@ #include <asm/runtime_instr.h> #include <asm/fpu/types.h> #include <asm/fpu/internal.h> +#include <asm/irqflags.h> + +typedef long (*sys_call_ptr_t)(struct pt_regs *regs); static inline void set_cpu_flag(int flag) { @@ -101,31 +104,32 @@ extern void __bpon(void); */ struct thread_struct { unsigned int acrs[NUM_ACRS]; - unsigned long ksp; /* kernel stack pointer */ - unsigned long user_timer; /* task cputime in user space */ - unsigned long guest_timer; /* task cputime in kvm guest */ - unsigned long system_timer; /* task cputime in kernel space */ - unsigned long hardirq_timer; /* task cputime in hardirq context */ - unsigned long softirq_timer; /* task cputime in softirq context */ - unsigned long sys_call_table; /* system call table address */ - unsigned long gmap_addr; /* address of last gmap fault. */ - unsigned int gmap_write_flag; /* gmap fault write indication */ - unsigned int gmap_int_code; /* int code of last gmap fault */ - unsigned int gmap_pfault; /* signal of a pending guest pfault */ + unsigned long ksp; /* kernel stack pointer */ + unsigned long user_timer; /* task cputime in user space */ + unsigned long guest_timer; /* task cputime in kvm guest */ + unsigned long system_timer; /* task cputime in kernel space */ + unsigned long hardirq_timer; /* task cputime in hardirq context */ + unsigned long softirq_timer; /* task cputime in softirq context */ + const sys_call_ptr_t *sys_call_table; /* system call table address */ + unsigned long gmap_addr; /* address of last gmap fault. */ + unsigned int gmap_write_flag; /* gmap fault write indication */ + unsigned int gmap_int_code; /* int code of last gmap fault */ + unsigned int gmap_pfault; /* signal of a pending guest pfault */ + /* Per-thread information related to debugging */ - struct per_regs per_user; /* User specified PER registers */ - struct per_event per_event; /* Cause of the last PER trap */ - unsigned long per_flags; /* Flags to control debug behavior */ - unsigned int system_call; /* system call number in signal */ - unsigned long last_break; /* last breaking-event-address. */ - /* pfault_wait is used to block the process on a pfault event */ + struct per_regs per_user; /* User specified PER registers */ + struct per_event per_event; /* Cause of the last PER trap */ + unsigned long per_flags; /* Flags to control debug behavior */ + unsigned int system_call; /* system call number in signal */ + unsigned long last_break; /* last breaking-event-address. */ + /* pfault_wait is used to block the process on a pfault event */ unsigned long pfault_wait; struct list_head list; /* cpu runtime instrumentation */ struct runtime_instr_cb *ri_cb; - struct gs_cb *gs_cb; /* Current guarded storage cb */ - struct gs_cb *gs_bc_cb; /* Broadcast guarded storage cb */ - unsigned char trap_tdb[256]; /* Transaction abort diagnose block */ + struct gs_cb *gs_cb; /* Current guarded storage cb */ + struct gs_cb *gs_bc_cb; /* Broadcast guarded storage cb */ + unsigned char trap_tdb[256]; /* Transaction abort diagnose block */ /* * Warning: 'fpu' is dynamically-sized. It *MUST* be at * the end. @@ -184,6 +188,7 @@ static inline void release_thread(struct task_struct *tsk) { } /* Free guarded storage control block */ void guarded_storage_release(struct task_struct *tsk); +void gs_load_bc_cb(struct pt_regs *regs); unsigned long get_wchan(struct task_struct *p); #define task_pt_regs(tsk) ((struct pt_regs *) \ @@ -324,6 +329,11 @@ extern void memcpy_absolute(void *, void *, size_t); extern int s390_isolate_bp(void); extern int s390_isolate_bp_guest(void); +static __always_inline bool regs_irqs_disabled(struct pt_regs *regs) +{ + return arch_irqs_disabled_flags(regs->psw.mask); +} + #endif /* __ASSEMBLY__ */ #endif /* __ASM_S390_PROCESSOR_H */ diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 73ca7f7cac33..f828be78937f 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -11,13 +11,13 @@ #include <uapi/asm/ptrace.h> #define PIF_SYSCALL 0 /* inside a system call */ -#define PIF_PER_TRAP 1 /* deliver sigtrap on return to user */ -#define PIF_SYSCALL_RESTART 2 /* restart the current system call */ +#define PIF_SYSCALL_RESTART 1 /* restart the current system call */ +#define PIF_SYSCALL_RET_SET 2 /* return value was set via ptrace */ #define PIF_GUEST_FAULT 3 /* indicates program check in sie64a */ #define _PIF_SYSCALL BIT(PIF_SYSCALL) -#define _PIF_PER_TRAP BIT(PIF_PER_TRAP) #define _PIF_SYSCALL_RESTART BIT(PIF_SYSCALL_RESTART) +#define _PIF_SYSCALL_RET_SET BIT(PIF_SYSCALL_RET_SET) #define _PIF_GUEST_FAULT BIT(PIF_GUEST_FAULT) #ifndef __ASSEMBLY__ @@ -68,6 +68,9 @@ enum { &(*(struct psw_bits *)(&(__psw))); \ })) +#define PGM_INT_CODE_MASK 0x7f +#define PGM_INT_CODE_PER 0x80 + /* * The pt_regs struct defines the way the registers are stored on * the stack during a system call. diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 19e84c95d1e7..d9215c7106f0 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -250,17 +250,13 @@ struct slsb { * struct qdio_outbuf_state - SBAL related asynchronous operation information * (for communication with upper layer programs) * (only required for use with completion queues) - * @flags: flags indicating state of buffer * @user: pointer to upper layer program's state information related to SBAL * (stored in user1 data of QAOB) */ struct qdio_outbuf_state { - u8 flags; void *user; }; -#define QDIO_OUTBUF_STATE_FLAG_PENDING 0x01 - #define CHSC_AC1_INITIATE_INPUTQ 0x80 @@ -315,6 +311,7 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int, #define QDIO_ERROR_GET_BUF_STATE 0x0002 #define QDIO_ERROR_SET_BUF_STATE 0x0004 #define QDIO_ERROR_SLSB_STATE 0x0100 +#define QDIO_ERROR_SLSB_PENDING 0x0200 #define QDIO_ERROR_FATAL 0x00ff #define QDIO_ERROR_TEMPORARY 0xff00 @@ -336,7 +333,7 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int, * @no_output_qs: number of output queues * @input_handler: handler to be called for input queues * @output_handler: handler to be called for output queues - * @irq_poll: Data IRQ polling handler (NULL when not supported) + * @irq_poll: Data IRQ polling handler * @scan_threshold: # of in-use buffers that triggers scan on output queue * @int_parm: interruption parameter * @input_sbal_addr_array: per-queue array, each element points to 128 SBALs diff --git a/arch/s390/include/asm/scsw.h b/arch/s390/include/asm/scsw.h index c00f7b031628..a7c3ccf681da 100644 --- a/arch/s390/include/asm/scsw.h +++ b/arch/s390/include/asm/scsw.h @@ -525,8 +525,7 @@ static inline int scsw_cmd_is_valid_pno(union scsw *scsw) return (scsw->cmd.fctl != 0) && (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) && (!(scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) || - ((scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) && - (scsw->cmd.actl & SCSW_ACTL_SUSPENDED))); + (scsw->cmd.actl & SCSW_ACTL_SUSPENDED)); } /** diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index d9d5de0f67ff..9107e3dab68c 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -14,8 +14,8 @@ #include <linux/err.h> #include <asm/ptrace.h> -extern const unsigned long sys_call_table[]; -extern const unsigned long sys_call_table_emu[]; +extern const sys_call_ptr_t sys_call_table[]; +extern const sys_call_ptr_t sys_call_table_emu[]; static inline long syscall_get_nr(struct task_struct *task, struct pt_regs *regs) @@ -56,6 +56,7 @@ static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) { + set_pt_regs_flag(regs, PIF_SYSCALL_RET_SET); regs->gprs[2] = error ? error : val; } @@ -97,4 +98,10 @@ static inline int syscall_get_arch(struct task_struct *task) #endif return AUDIT_ARCH_S390X; } + +static inline bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs) +{ + return false; +} + #endif /* _ASM_SYSCALL_H */ diff --git a/arch/s390/include/asm/syscall_wrapper.h b/arch/s390/include/asm/syscall_wrapper.h index 1320f4213d80..ad2c996e7e93 100644 --- a/arch/s390/include/asm/syscall_wrapper.h +++ b/arch/s390/include/asm/syscall_wrapper.h @@ -7,6 +7,33 @@ #ifndef _ASM_S390_SYSCALL_WRAPPER_H #define _ASM_S390_SYSCALL_WRAPPER_H +#define __SC_TYPE(t, a) t + +#define SYSCALL_PT_ARG6(regs, m, t1, t2, t3, t4, t5, t6)\ + SYSCALL_PT_ARG5(regs, m, t1, t2, t3, t4, t5), \ + m(t6, (regs->gprs[7])) + +#define SYSCALL_PT_ARG5(regs, m, t1, t2, t3, t4, t5) \ + SYSCALL_PT_ARG4(regs, m, t1, t2, t3, t4), \ + m(t5, (regs->gprs[6])) + +#define SYSCALL_PT_ARG4(regs, m, t1, t2, t3, t4) \ + SYSCALL_PT_ARG3(regs, m, t1, t2, t3), \ + m(t4, (regs->gprs[5])) + +#define SYSCALL_PT_ARG3(regs, m, t1, t2, t3) \ + SYSCALL_PT_ARG2(regs, m, t1, t2), \ + m(t3, (regs->gprs[4])) + +#define SYSCALL_PT_ARG2(regs, m, t1, t2) \ + SYSCALL_PT_ARG1(regs, m, t1), \ + m(t2, (regs->gprs[3])) + +#define SYSCALL_PT_ARG1(regs, m, t1) \ + m(t1, (regs->orig_gpr2)) + +#define SYSCALL_PT_ARGS(x, ...) SYSCALL_PT_ARG##x(__VA_ARGS__) + #ifdef CONFIG_COMPAT #define __SC_COMPAT_TYPE(t, a) \ __typeof(__builtin_choose_expr(sizeof(t) > 4, 0L, (t)0)) a @@ -29,14 +56,15 @@ (t)__ReS; \ }) -#define __S390_SYS_STUBx(x, name, ...) \ - asmlinkage long __s390_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));\ - ALLOW_ERROR_INJECTION(__s390_sys##name, ERRNO); \ - asmlinkage long __s390_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))\ - { \ - long ret = __s390x_sys##name(__MAP(x,__SC_COMPAT_CAST,__VA_ARGS__));\ - __MAP(x,__SC_TEST,__VA_ARGS__); \ - return ret; \ +#define __S390_SYS_STUBx(x, name, ...) \ + long __s390_sys##name(struct pt_regs *regs); \ + ALLOW_ERROR_INJECTION(__s390_sys##name, ERRNO); \ + long __s390_sys##name(struct pt_regs *regs) \ + { \ + long ret = __do_sys##name(SYSCALL_PT_ARGS(x, regs, \ + __SC_COMPAT_CAST, __MAP(x, __SC_TYPE, __VA_ARGS__))); \ + __MAP(x,__SC_TEST,__VA_ARGS__); \ + return ret; \ } /* @@ -45,17 +73,17 @@ */ #define COMPAT_SYSCALL_DEFINE0(sname) \ SYSCALL_METADATA(_##sname, 0); \ - asmlinkage long __s390_compat_sys_##sname(void); \ + long __s390_compat_sys_##sname(void); \ ALLOW_ERROR_INJECTION(__s390_compat_sys_##sname, ERRNO); \ - asmlinkage long __s390_compat_sys_##sname(void) + long __s390_compat_sys_##sname(void) #define SYSCALL_DEFINE0(sname) \ SYSCALL_METADATA(_##sname, 0); \ - asmlinkage long __s390x_sys_##sname(void); \ + long __s390x_sys_##sname(void); \ ALLOW_ERROR_INJECTION(__s390x_sys_##sname, ERRNO); \ - asmlinkage long __s390_sys_##sname(void) \ + long __s390_sys_##sname(void) \ __attribute__((alias(__stringify(__s390x_sys_##sname)))); \ - asmlinkage long __s390x_sys_##sname(void) + long __s390x_sys_##sname(void) #define COND_SYSCALL(name) \ cond_syscall(__s390x_sys_##name); \ @@ -65,23 +93,24 @@ SYSCALL_ALIAS(__s390x_sys_##name, sys_ni_posix_timers); \ SYSCALL_ALIAS(__s390_sys_##name, sys_ni_posix_timers) -#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ - __diag_push(); \ - __diag_ignore(GCC, 8, "-Wattribute-alias", \ - "Type aliasing is used to sanitize syscall arguments");\ - asmlinkage long __s390_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ - asmlinkage long __s390_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ - __attribute__((alias(__stringify(__se_compat_sys##name)))); \ - ALLOW_ERROR_INJECTION(__s390_compat_sys##name, ERRNO); \ - static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ - asmlinkage long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ - asmlinkage long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ - { \ - long ret = __do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__));\ - __MAP(x,__SC_TEST,__VA_ARGS__); \ - return ret; \ - } \ - __diag_pop(); \ +#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ + __diag_push(); \ + __diag_ignore(GCC, 8, "-Wattribute-alias", \ + "Type aliasing is used to sanitize syscall arguments"); \ + long __s390_compat_sys##name(struct pt_regs *regs); \ + long __s390_compat_sys##name(struct pt_regs *regs) \ + __attribute__((alias(__stringify(__se_compat_sys##name)))); \ + ALLOW_ERROR_INJECTION(__s390_compat_sys##name, ERRNO); \ + static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ + long __se_compat_sys##name(struct pt_regs *regs); \ + long __se_compat_sys##name(struct pt_regs *regs) \ + { \ + long ret = __do_compat_sys##name(SYSCALL_PT_ARGS(x, regs, __SC_DELOUSE, \ + __MAP(x, __SC_TYPE, __VA_ARGS__))); \ + __MAP(x,__SC_TEST,__VA_ARGS__); \ + return ret; \ + } \ + __diag_pop(); \ static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) /* @@ -101,9 +130,9 @@ #define SYSCALL_DEFINE0(sname) \ SYSCALL_METADATA(_##sname, 0); \ - asmlinkage long __s390x_sys_##sname(void); \ + long __s390x_sys_##sname(void); \ ALLOW_ERROR_INJECTION(__s390x_sys_##sname, ERRNO); \ - asmlinkage long __s390x_sys_##sname(void) + long __s390x_sys_##sname(void) #define COND_SYSCALL(name) \ cond_syscall(__s390x_sys_##name) @@ -113,23 +142,24 @@ #endif /* CONFIG_COMPAT */ -#define __SYSCALL_DEFINEx(x, name, ...) \ - __diag_push(); \ - __diag_ignore(GCC, 8, "-Wattribute-alias", \ - "Type aliasing is used to sanitize syscall arguments");\ - asmlinkage long __s390x_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ - __attribute__((alias(__stringify(__se_sys##name)))); \ - ALLOW_ERROR_INJECTION(__s390x_sys##name, ERRNO); \ - long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ - static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ - __S390_SYS_STUBx(x, name, __VA_ARGS__) \ - asmlinkage long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ - { \ - long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \ - __MAP(x,__SC_TEST,__VA_ARGS__); \ - return ret; \ - } \ - __diag_pop(); \ +#define __SYSCALL_DEFINEx(x, name, ...) \ + __diag_push(); \ + __diag_ignore(GCC, 8, "-Wattribute-alias", \ + "Type aliasing is used to sanitize syscall arguments"); \ + long __s390x_sys##name(struct pt_regs *regs) \ + __attribute__((alias(__stringify(__se_sys##name)))); \ + ALLOW_ERROR_INJECTION(__s390x_sys##name, ERRNO); \ + static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ + long __se_sys##name(struct pt_regs *regs); \ + __S390_SYS_STUBx(x, name, __VA_ARGS__) \ + long __se_sys##name(struct pt_regs *regs) \ + { \ + long ret = __do_sys##name(SYSCALL_PT_ARGS(x, regs, \ + __SC_CAST, __MAP(x, __SC_TYPE, __VA_ARGS__))); \ + __MAP(x,__SC_TEST,__VA_ARGS__); \ + return ret; \ + } \ + __diag_pop(); \ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) #endif /* _ASM_X86_SYSCALL_WRAPPER_H */ diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 3c5b1f909b6d..e6674796aa6f 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -36,6 +36,7 @@ */ struct thread_info { unsigned long flags; /* low level flags */ + unsigned long syscall_work; /* SYSCALL_WORK_ flags */ }; /* @@ -46,6 +47,8 @@ struct thread_info { .flags = 0, \ } +struct task_struct; + void arch_release_task_struct(struct task_struct *tsk); int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); @@ -68,6 +71,7 @@ void arch_setup_new_exec(void); #define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */ #define TIF_ISOLATE_BP 8 /* Run process with isolated BP */ #define TIF_ISOLATE_BP_GUEST 9 /* Run KVM guests with isolated BP */ +#define TIF_PER_TRAP 10 /* Need to handle PER trap on exit to usermode */ #define TIF_31BIT 16 /* 32bit process */ #define TIF_MEMDIE 17 /* is terminating due to OOM killer */ @@ -91,6 +95,7 @@ void arch_setup_new_exec(void); #define _TIF_PATCH_PENDING BIT(TIF_PATCH_PENDING) #define _TIF_ISOLATE_BP BIT(TIF_ISOLATE_BP) #define _TIF_ISOLATE_BP_GUEST BIT(TIF_ISOLATE_BP_GUEST) +#define _TIF_PER_TRAP BIT(TIF_PER_TRAP) #define _TIF_31BIT BIT(TIF_31BIT) #define _TIF_SINGLE_STEP BIT(TIF_SINGLE_STEP) diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index c8e244ecdfde..c4e23e925665 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -19,6 +19,25 @@ extern u64 clock_comparator_max; +union tod_clock { + __uint128_t val; + struct { + __uint128_t ei : 8; /* epoch index */ + __uint128_t tod : 64; /* bits 0-63 of tod clock */ + __uint128_t : 40; + __uint128_t pf : 16; /* programmable field */ + }; + struct { + __uint128_t eitod : 72; /* epoch index + bits 0-63 tod clock */ + __uint128_t : 56; + }; + struct { + __uint128_t us : 60; /* micro-seconds */ + __uint128_t sus : 12; /* sub-microseconds */ + __uint128_t : 56; + }; +} __packed; + /* Inline functions for clock register access. */ static inline int set_tod_clock(__u64 time) { @@ -32,18 +51,23 @@ static inline int set_tod_clock(__u64 time) return cc; } -static inline int store_tod_clock(__u64 *time) +static inline int store_tod_clock_ext_cc(union tod_clock *clk) { int cc; asm volatile( - " stck %1\n" + " stcke %1\n" " ipm %0\n" " srl %0,28\n" - : "=d" (cc), "=Q" (*time) : : "cc"); + : "=d" (cc), "=Q" (*clk) : : "cc"); return cc; } +static inline void store_tod_clock_ext(union tod_clock *tod) +{ + asm volatile("stcke %0" : "=Q" (*tod) : : "cc"); +} + static inline void set_clock_comparator(__u64 time) { asm volatile("sckc %0" : : "Q" (time)); @@ -144,23 +168,15 @@ static inline void local_tick_enable(unsigned long long comp) } #define CLOCK_TICK_RATE 1193180 /* Underlying HZ */ -#define STORE_CLOCK_EXT_SIZE 16 /* stcke writes 16 bytes */ typedef unsigned long long cycles_t; -static inline void get_tod_clock_ext(char *clk) -{ - typedef struct { char _[STORE_CLOCK_EXT_SIZE]; } addrtype; - - asm volatile("stcke %0" : "=Q" (*(addrtype *) clk) : : "cc"); -} - static inline unsigned long long get_tod_clock(void) { - char clk[STORE_CLOCK_EXT_SIZE]; + union tod_clock clk; - get_tod_clock_ext(clk); - return *((unsigned long long *)&clk[1]); + store_tod_clock_ext(&clk); + return clk.tod; } static inline unsigned long long get_tod_clock_fast(void) @@ -183,7 +199,7 @@ static inline cycles_t get_cycles(void) int get_phys_clock(unsigned long *clock); void init_cpu_timer(void); -extern unsigned char tod_clock_base[16] __aligned(8); +extern union tod_clock tod_clock_base; /** * get_clock_monotonic - returns current time in clock rate units @@ -197,7 +213,7 @@ static inline unsigned long long get_tod_clock_monotonic(void) unsigned long long tod; preempt_disable_notrace(); - tod = get_tod_clock() - *(unsigned long long *) &tod_clock_base[1]; + tod = get_tod_clock() - tod_clock_base.tod; preempt_enable_notrace(); return tod; } diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 954fa8ca6cbd..fe6407f0eb1b 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -66,7 +66,7 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, __tlb_adjust_range(tlb, address, PAGE_SIZE); tlb->mm->context.flush_mm = 1; tlb->freed_tables = 1; - tlb->cleared_ptes = 1; + tlb->cleared_pmds = 1; /* * page_table_free_rcu takes care of the allocation bit masks * of the 2K table fragments in the 4K page table page, @@ -110,7 +110,6 @@ static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, __tlb_adjust_range(tlb, address, PAGE_SIZE); tlb->mm->context.flush_mm = 1; tlb->freed_tables = 1; - tlb->cleared_p4ds = 1; tlb_remove_table(tlb, p4d); } @@ -128,7 +127,7 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, return; tlb->mm->context.flush_mm = 1; tlb->freed_tables = 1; - tlb->cleared_puds = 1; + tlb->cleared_p4ds = 1; tlb_remove_table(tlb, pud); } diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index c6707885e7c2..4756d2937e54 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -18,7 +18,7 @@ #include <asm/extable.h> #include <asm/facility.h> -void debug_user_asce(void); +void debug_user_asce(int exit); static inline int __range_ok(unsigned long addr, unsigned long size) { diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index f65590889054..b45e3dddd2c2 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -4,17 +4,18 @@ #include <vdso/datapage.h> -/* Default link addresses for the vDSOs */ -#define VDSO32_LBASE 0 +/* Default link address for the vDSO */ #define VDSO64_LBASE 0 +#define __VVAR_PAGES 2 + #define VDSO_VERSION_STRING LINUX_2.6.29 #ifndef __ASSEMBLY__ extern struct vdso_data *vdso_data; -void vdso_getcpu_init(void); +int vdso_getcpu_init(void); #endif /* __ASSEMBLY__ */ #endif /* __S390_VDSO_H__ */ diff --git a/arch/s390/include/asm/vdso/gettimeofday.h b/arch/s390/include/asm/vdso/gettimeofday.h index bf123065ad3b..ed89ef742530 100644 --- a/arch/s390/include/asm/vdso/gettimeofday.h +++ b/arch/s390/include/asm/vdso/gettimeofday.h @@ -24,13 +24,12 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void) static inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_data *vd) { - const struct vdso_data *vdso = __arch_get_vdso_data(); u64 adj, now; now = get_tod_clock(); - adj = vdso->arch_data.tod_steering_end - now; + adj = vd->arch_data.tod_steering_end - now; if (unlikely((s64) adj > 0)) - now += (vdso->arch_data.tod_steering_delta < 0) ? (adj >> 15) : -(adj >> 15); + now += (vd->arch_data.tod_steering_delta < 0) ? (adj >> 15) : -(adj >> 15); return now; } @@ -68,4 +67,11 @@ long clock_getres_fallback(clockid_t clkid, struct __kernel_timespec *ts) return r2; } +#ifdef CONFIG_TIME_NS +static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void) +{ + return _timens_data; +} +#endif + #endif diff --git a/arch/s390/include/asm/vtime.h b/arch/s390/include/asm/vtime.h index fac6a67988eb..fe17e448c0c5 100644 --- a/arch/s390/include/asm/vtime.h +++ b/arch/s390/include/asm/vtime.h @@ -4,4 +4,18 @@ #define __ARCH_HAS_VTIME_TASK_SWITCH +static inline void update_timer_sys(void) +{ + S390_lowcore.system_timer += S390_lowcore.last_update_timer - S390_lowcore.exit_timer; + S390_lowcore.user_timer += S390_lowcore.exit_timer - S390_lowcore.sys_enter_timer; + S390_lowcore.last_update_timer = S390_lowcore.sys_enter_timer; +} + +static inline void update_timer_mcck(void) +{ + S390_lowcore.system_timer += S390_lowcore.last_update_timer - S390_lowcore.exit_timer; + S390_lowcore.user_timer += S390_lowcore.exit_timer - S390_lowcore.mcck_enter_timer; + S390_lowcore.last_update_timer = S390_lowcore.mcck_enter_timer; +} + #endif /* _S390_VTIME_H */ diff --git a/arch/s390/include/uapi/asm/perf_cpum_cf_diag.h b/arch/s390/include/uapi/asm/perf_cpum_cf_diag.h new file mode 100644 index 000000000000..3d8284b95f87 --- /dev/null +++ b/arch/s390/include/uapi/asm/perf_cpum_cf_diag.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright IBM Corp. 2021 + * Interface implementation for communication with the CPU Measurement + * counter facility device driver. + * + * Author(s): Thomas Richter <tmricht@linux.ibm.com> + * + * Define for ioctl() commands to communicate with the CPU Measurement + * counter facility device driver. + */ + +#ifndef _PERF_CPUM_CF_DIAG_H +#define _PERF_CPUM_CF_DIAG_H + +#include <linux/ioctl.h> +#include <linux/types.h> + +#define S390_HWCTR_DEVICE "hwctr" +#define S390_HWCTR_START_VERSION 1 + +struct s390_ctrset_start { /* Set CPUs to operate on */ + __u64 version; /* Version of interface */ + __u64 data_bytes; /* # of bytes required */ + __u64 cpumask_len; /* Length of CPU mask in bytes */ + __u64 *cpumask; /* Pointer to CPU mask */ + __u64 counter_sets; /* Bit mask of counter sets to get */ +}; + +struct s390_ctrset_setdata { /* Counter set data */ + __u32 set; /* Counter set number */ + __u32 no_cnts; /* # of counters stored in cv[] */ + __u64 cv[0]; /* Counter values (variable length) */ +}; + +struct s390_ctrset_cpudata { /* Counter set data per CPU */ + __u32 cpu_nr; /* CPU number */ + __u32 no_sets; /* # of counters sets in data[] */ + struct s390_ctrset_setdata data[0]; +}; + +struct s390_ctrset_read { /* Structure to get all ctr sets */ + __u64 no_cpus; /* Total # of CPUs data taken from */ + struct s390_ctrset_cpudata data[0]; +}; + +#define S390_HWCTR_MAGIC 'C' /* Random magic # for ioctls */ +#define S390_HWCTR_START _IOWR(S390_HWCTR_MAGIC, 1, struct s390_ctrset_start) +#define S390_HWCTR_STOP _IO(S390_HWCTR_MAGIC, 2) +#define S390_HWCTR_READ _IOWR(S390_HWCTR_MAGIC, 3, struct s390_ctrset_read) +#endif diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h index 543dd70e12c8..ad64d673b5e6 100644 --- a/arch/s390/include/uapi/asm/ptrace.h +++ b/arch/s390/include/uapi/asm/ptrace.h @@ -179,8 +179,9 @@ #define ACR_SIZE 4 -#define PTRACE_OLDSETOPTIONS 21 - +#define PTRACE_OLDSETOPTIONS 21 +#define PTRACE_SYSEMU 31 +#define PTRACE_SYSEMU_SINGLESTEP 32 #ifndef __ASSEMBLY__ #include <linux/stddef.h> #include <linux/types.h> diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index dd73b7f07423..c97818a382f3 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -34,7 +34,7 @@ CFLAGS_dumpstack.o += -fno-optimize-sibling-calls CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o -obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o +obj-y += processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o pgm_check.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 79724d861dc9..15e637728a4b 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -26,26 +26,14 @@ int main(void) BLANK(); /* thread struct offsets */ OFFSET(__THREAD_ksp, thread_struct, ksp); - OFFSET(__THREAD_sysc_table, thread_struct, sys_call_table); - OFFSET(__THREAD_last_break, thread_struct, last_break); - OFFSET(__THREAD_FPU_fpc, thread_struct, fpu.fpc); - OFFSET(__THREAD_FPU_regs, thread_struct, fpu.regs); - OFFSET(__THREAD_per_cause, thread_struct, per_event.cause); - OFFSET(__THREAD_per_address, thread_struct, per_event.address); - OFFSET(__THREAD_per_paid, thread_struct, per_event.paid); - OFFSET(__THREAD_trap_tdb, thread_struct, trap_tdb); BLANK(); /* thread info offsets */ OFFSET(__TI_flags, task_struct, thread_info.flags); BLANK(); /* pt_regs offsets */ - OFFSET(__PT_ARGS, pt_regs, args); OFFSET(__PT_PSW, pt_regs, psw); OFFSET(__PT_GPRS, pt_regs, gprs); OFFSET(__PT_ORIG_GPR2, pt_regs, orig_gpr2); - OFFSET(__PT_INT_CODE, pt_regs, int_code); - OFFSET(__PT_INT_PARM, pt_regs, int_parm); - OFFSET(__PT_INT_PARM_LONG, pt_regs, int_parm_long); OFFSET(__PT_FLAGS, pt_regs, flags); OFFSET(__PT_CR1, pt_regs, cr1); DEFINE(__PT_SIZE, sizeof(struct pt_regs)); @@ -64,6 +52,7 @@ int main(void) OFFSET(__CLOCK_IDLE_EXIT, s390_idle_data, clock_idle_exit); OFFSET(__TIMER_IDLE_ENTER, s390_idle_data, timer_idle_enter); OFFSET(__TIMER_IDLE_EXIT, s390_idle_data, timer_idle_exit); + OFFSET(__MT_CYCLES_ENTER, s390_idle_data, mt_cycles_enter); BLANK(); /* hardware defined lowcore locations 0x000 - 0x1ff */ OFFSET(__LC_EXT_PARAMS, lowcore, ext_params); @@ -115,13 +104,9 @@ int main(void) OFFSET(__LC_CPU_FLAGS, lowcore, cpu_flags); OFFSET(__LC_RETURN_PSW, lowcore, return_psw); OFFSET(__LC_RETURN_MCCK_PSW, lowcore, return_mcck_psw); - OFFSET(__LC_SYNC_ENTER_TIMER, lowcore, sync_enter_timer); - OFFSET(__LC_ASYNC_ENTER_TIMER, lowcore, async_enter_timer); + OFFSET(__LC_SYS_ENTER_TIMER, lowcore, sys_enter_timer); OFFSET(__LC_MCCK_ENTER_TIMER, lowcore, mcck_enter_timer); OFFSET(__LC_EXIT_TIMER, lowcore, exit_timer); - OFFSET(__LC_USER_TIMER, lowcore, user_timer); - OFFSET(__LC_SYSTEM_TIMER, lowcore, system_timer); - OFFSET(__LC_STEAL_TIMER, lowcore, steal_timer); OFFSET(__LC_LAST_UPDATE_TIMER, lowcore, last_update_timer); OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock); OFFSET(__LC_INT_CLOCK, lowcore, int_clock); @@ -133,6 +118,7 @@ int main(void) OFFSET(__LC_ASYNC_STACK, lowcore, async_stack); OFFSET(__LC_NODAT_STACK, lowcore, nodat_stack); OFFSET(__LC_RESTART_STACK, lowcore, restart_stack); + OFFSET(__LC_MCCK_STACK, lowcore, mcck_stack); OFFSET(__LC_RESTART_FN, lowcore, restart_fn); OFFSET(__LC_RESTART_DATA, lowcore, restart_data); OFFSET(__LC_RESTART_SOURCE, lowcore, restart_source); diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index 38d4bdbc34b9..1d0e17ec93eb 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -118,6 +118,7 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) fpregs_load((_s390_fp_regs *) &user_sregs.fpregs, ¤t->thread.fpu); clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */ + clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART); return 0; } diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 205b2e2648aa..0e36dfc9ccd6 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -365,7 +365,7 @@ static void *fill_cpu_elf_notes(void *ptr, int cpu, struct save_area *sa) memcpy(&nt_prstatus.pr_reg.gprs, sa->gprs, sizeof(sa->gprs)); memcpy(&nt_prstatus.pr_reg.psw, sa->psw, sizeof(sa->psw)); memcpy(&nt_prstatus.pr_reg.acrs, sa->acrs, sizeof(sa->acrs)); - nt_prstatus.pr_pid = cpu; + nt_prstatus.common.pr_pid = cpu; /* Prepare fpregset (floating point) note */ memset(&nt_fpregset, 0, sizeof(nt_fpregset)); memcpy(&nt_fpregset.fpc, &sa->fpc, sizeof(sa->fpc)); diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index b6619ae9a3e0..bb958d32bd81 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -829,11 +829,11 @@ static inline debug_entry_t *get_active_entry(debug_info_t *id) static inline void debug_finish_entry(debug_info_t *id, debug_entry_t *active, int level, int exception) { - unsigned char clk[STORE_CLOCK_EXT_SIZE]; unsigned long timestamp; + union tod_clock clk; - get_tod_clock_ext(clk); - timestamp = *(unsigned long *) &clk[0] >> 4; + store_tod_clock_ext(&clk); + timestamp = clk.us; timestamp -= TOD_UNIX_EPOCH >> 12; active->clock = timestamp; active->cpu = smp_processor_id(); diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index cc89763a4d3c..a361d2e70025 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -35,16 +35,16 @@ static void __init reset_tod_clock(void) { - u64 time; + union tod_clock clk; - if (store_tod_clock(&time) == 0) + if (store_tod_clock_ext_cc(&clk) == 0) return; /* TOD clock not running. Set the clock to Unix Epoch. */ - if (set_tod_clock(TOD_UNIX_EPOCH) != 0 || store_tod_clock(&time) != 0) + if (set_tod_clock(TOD_UNIX_EPOCH) || store_tod_clock_ext_cc(&clk)) disabled_wait(); - memset(tod_clock_base, 0, 16); - *(__u64 *) &tod_clock_base[1] = TOD_UNIX_EPOCH; + memset(&tod_clock_base, 0, sizeof(tod_clock_base)); + tod_clock_base.tod = TOD_UNIX_EPOCH; S390_lowcore.last_update_clock = TOD_UNIX_EPOCH; } @@ -230,7 +230,7 @@ static __init void detect_machine_facilities(void) } if (test_facility(133)) S390_lowcore.machine_flags |= MACHINE_FLAG_GS; - if (test_facility(139) && (tod_clock_base[1] & 0x80)) { + if (test_facility(139) && (tod_clock_base.tod >> 63)) { /* Enabled signed clock comparator comparisons */ S390_lowcore.machine_flags |= MACHINE_FLAG_SCC; clock_comparator_max = -1ULL >> 1; diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index f1ba197b10c0..c10b9f31eef7 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -51,38 +51,8 @@ STACK_SHIFT = PAGE_SHIFT + THREAD_SIZE_ORDER STACK_SIZE = 1 << STACK_SHIFT STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE -_TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_UPROBE | _TIF_GUARDED_STORAGE | _TIF_PATCH_PENDING | \ - _TIF_NOTIFY_SIGNAL) -_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ - _TIF_SYSCALL_TRACEPOINT) -_CIF_WORK = (_CIF_FPU) -_PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART) - _LPP_OFFSET = __LC_LPP - .macro TRACE_IRQS_ON -#ifdef CONFIG_TRACE_IRQFLAGS - basr %r2,%r0 - brasl %r14,trace_hardirqs_on_caller -#endif - .endm - - .macro TRACE_IRQS_OFF -#ifdef CONFIG_TRACE_IRQFLAGS - basr %r2,%r0 - brasl %r14,trace_hardirqs_off_caller -#endif - .endm - - .macro LOCKDEP_SYS_EXIT -#ifdef CONFIG_LOCKDEP - tm __PT_PSW+1(%r11),0x01 # returning to user ? - jz .+10 - brasl %r14,lockdep_sys_exit -#endif - .endm - .macro CHECK_STACK savearea #ifdef CONFIG_CHECK_STACK tml %r15,STACK_SIZE - CONFIG_STACK_GUARD @@ -91,12 +61,6 @@ _LPP_OFFSET = __LC_LPP #endif .endm - .macro DEBUG_USER_ASCE -#ifdef CONFIG_DEBUG_USER_ASCE - brasl %r14,debug_user_asce -#endif - .endm - .macro CHECK_VMAP_STACK savearea,oklabel #ifdef CONFIG_VMAP_STACK lgr %r14,%r15 @@ -106,6 +70,8 @@ _LPP_OFFSET = __LC_LPP je \oklabel clg %r14,__LC_ASYNC_STACK je \oklabel + clg %r14,__LC_MCCK_STACK + je \oklabel clg %r14,__LC_NODAT_STACK je \oklabel clg %r14,__LC_RESTART_STACK @@ -117,113 +83,9 @@ _LPP_OFFSET = __LC_LPP #endif .endm - .macro SWITCH_ASYNC savearea,timer,clock - tmhh %r8,0x0001 # interrupting from user ? - jnz 4f -#if IS_ENABLED(CONFIG_KVM) - lgr %r14,%r9 - larl %r13,.Lsie_gmap - slgr %r14,%r13 - lghi %r13,.Lsie_done - .Lsie_gmap - clgr %r14,%r13 - jhe 0f - lghi %r11,\savearea # inside critical section, do cleanup - brasl %r14,.Lcleanup_sie -#endif -0: larl %r13,.Lpsw_idle_exit - cgr %r13,%r9 - jne 3f - - larl %r1,smp_cpu_mtid - llgf %r1,0(%r1) - ltgr %r1,%r1 - jz 2f # no SMT, skip mt_cycles calculation - .insn rsy,0xeb0000000017,%r1,5,__SF_EMPTY+80(%r15) - larl %r3,mt_cycles - ag %r3,__LC_PERCPU_OFFSET - la %r4,__SF_EMPTY+16(%r15) -1: lg %r0,0(%r3) - slg %r0,0(%r4) - alg %r0,64(%r4) - stg %r0,0(%r3) - la %r3,8(%r3) - la %r4,8(%r4) - brct %r1,1b - -2: mvc __CLOCK_IDLE_EXIT(8,%r2), \clock - mvc __TIMER_IDLE_EXIT(8,%r2), \timer - # account system time going idle - ni __LC_CPU_FLAGS+7,255-_CIF_ENABLED_WAIT - - lg %r13,__LC_STEAL_TIMER - alg %r13,__CLOCK_IDLE_ENTER(%r2) - slg %r13,__LC_LAST_UPDATE_CLOCK - stg %r13,__LC_STEAL_TIMER - - mvc __LC_LAST_UPDATE_CLOCK(8),__CLOCK_IDLE_EXIT(%r2) - - lg %r13,__LC_SYSTEM_TIMER - alg %r13,__LC_LAST_UPDATE_TIMER - slg %r13,__TIMER_IDLE_ENTER(%r2) - stg %r13,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2) - - nihh %r8,0xfcfd # clear wait state and irq bits -3: lg %r14,__LC_ASYNC_STACK # are we already on the target stack? - slgr %r14,%r15 - srag %r14,%r14,STACK_SHIFT - jnz 5f - CHECK_STACK \savearea - aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - j 6f -4: UPDATE_VTIME %r14,%r15,\timer - BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP -5: lg %r15,__LC_ASYNC_STACK # load async stack -6: la %r11,STACK_FRAME_OVERHEAD(%r15) - .endm - - .macro UPDATE_VTIME w1,w2,enter_timer - lg \w1,__LC_EXIT_TIMER - lg \w2,__LC_LAST_UPDATE_TIMER - slg \w1,\enter_timer - slg \w2,__LC_EXIT_TIMER - alg \w1,__LC_USER_TIMER - alg \w2,__LC_SYSTEM_TIMER - stg \w1,__LC_USER_TIMER - stg \w2,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),\enter_timer - .endm - - .macro RESTORE_SM_CLEAR_PER - stg %r8,__LC_RETURN_PSW - ni __LC_RETURN_PSW,0xbf - ssm __LC_RETURN_PSW - .endm - - .macro ENABLE_INTS - stosm __SF_EMPTY(%r15),3 - .endm - - .macro ENABLE_INTS_TRACE - TRACE_IRQS_ON - ENABLE_INTS - .endm - - .macro DISABLE_INTS - stnsm __SF_EMPTY(%r15),0xfc - .endm - - .macro DISABLE_INTS_TRACE - DISABLE_INTS - TRACE_IRQS_OFF - .endm - .macro STCK savearea -#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES - .insn s,0xb27c0000,\savearea # store clock fast -#else - .insn s,0xb2050000,\savearea # store clock -#endif + ALTERNATIVE ".insn s,0xb2050000,\savearea", \ + ".insn s,0xb27c0000,\savearea", 25 .endm /* @@ -267,18 +129,17 @@ _LPP_OFFSET = __LC_LPP "jnz .+8; .long 0xb2e8d000", 82 .endm - GEN_BR_THUNK %r9 GEN_BR_THUNK %r14 - GEN_BR_THUNK %r14,%r11 + GEN_BR_THUNK %r14,%r13 .section .kprobes.text, "ax" .Ldummy: /* - * This nop exists only in order to avoid that __switch_to starts at + * This nop exists only in order to avoid that __bpon starts at * the beginning of the kprobes text section. In that case we would * have several symbols at the same address. E.g. objdump would take * an arbitrary symbol name when disassembling this code. - * With the added nop in between the __switch_to symbol is unique + * With the added nop in between the __bpon symbol is unique * again. */ nop 0 @@ -327,10 +188,6 @@ ENTRY(sie64a) stg %r3,__SF_SIE_SAVEAREA(%r15) # save guest register save area xc __SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0 mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r12) # copy thread flags - TSTMSK __LC_CPU_FLAGS,_CIF_FPU # load guest fp/vx registers ? - jno .Lsie_load_guest_gprs - brasl %r14,load_fpu_regs # load guest fp/vx regs -.Lsie_load_guest_gprs: lmg %r0,%r13,0(%r3) # load guest gprs 0-13 lg %r14,__LC_GMAP # get gmap pointer ltgr %r14,%r14 @@ -357,7 +214,7 @@ ENTRY(sie64a) # are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable. # Other instructions between sie64a and .Lsie_done should not cause program # interrupts. So lets use 3 nops as a landing pad for all possible rewinds. -# See also .Lcleanup_sie +# See also .Lcleanup_sie_mcck/.Lcleanup_sie_int .Lrewind_pad6: nopr 7 .Lrewind_pad4: @@ -370,7 +227,6 @@ sie_exit: stmg %r0,%r13,0(%r14) # save guest gprs 0-13 xgr %r0,%r0 # clear guest registers to xgr %r1,%r1 # prevent speculative use - xgr %r2,%r2 xgr %r3,%r3 xgr %r4,%r4 xgr %r5,%r5 @@ -397,249 +253,68 @@ EXPORT_SYMBOL(sie_exit) */ ENTRY(system_call) - stpt __LC_SYNC_ENTER_TIMER + stpt __LC_SYS_ENTER_TIMER stmg %r8,%r15,__LC_SAVE_AREA_SYNC BPOFF - lg %r12,__LC_CURRENT - lghi %r14,_PIF_SYSCALL + lghi %r14,0 .Lsysc_per: lctlg %c1,%c1,__LC_KERNEL_ASCE - lghi %r13,__TASK_thread + lg %r12,__LC_CURRENT lg %r15,__LC_KERNEL_STACK - la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs - UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER - BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP - stmg %r0,%r7,__PT_R0(%r11) - mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC - mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW - mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC - stg %r14,__PT_FLAGS(%r11) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - ENABLE_INTS -.Lsysc_do_svc: + stmg %r0,%r7,STACK_FRAME_OVERHEAD+__PT_R0(%r15) + BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP # clear user controlled register to prevent speculative use xgr %r0,%r0 - # load address of system call table - lg %r10,__THREAD_sysc_table(%r13,%r12) - llgh %r8,__PT_INT_CODE+2(%r11) - slag %r8,%r8,3 # shift and test for svc 0 - jnz .Lsysc_nr_ok - # svc 0: system call number in %r1 - llgfr %r1,%r1 # clear high word in r1 - sth %r1,__PT_INT_CODE+2(%r11) - cghi %r1,NR_syscalls - jnl .Lsysc_nr_ok - slag %r8,%r1,3 -.Lsysc_nr_ok: - stg %r2,__PT_ORIG_GPR2(%r11) - stg %r7,STACK_FRAME_OVERHEAD(%r15) - lg %r9,0(%r8,%r10) # get system call add. - TSTMSK __TI_flags(%r12),_TIF_TRACE - jnz .Lsysc_tracesys - BASR_EX %r14,%r9 # call sys_xxxx - stg %r2,__PT_R2(%r11) # store return value - -.Lsysc_return: -#ifdef CONFIG_DEBUG_RSEQ - lgr %r2,%r11 - brasl %r14,rseq_syscall -#endif - LOCKDEP_SYS_EXIT -.Lsysc_tif: - DISABLE_INTS - TSTMSK __PT_FLAGS(%r11),_PIF_WORK - jnz .Lsysc_work - TSTMSK __TI_flags(%r12),_TIF_WORK - jnz .Lsysc_work # check for work - DEBUG_USER_ASCE + xgr %r1,%r1 + xgr %r4,%r4 + xgr %r5,%r5 + xgr %r6,%r6 + xgr %r7,%r7 + xgr %r8,%r8 + xgr %r9,%r9 + xgr %r10,%r10 + xgr %r11,%r11 + la %r2,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs + lgr %r3,%r14 + brasl %r14,__do_syscall lctlg %c1,%c1,__LC_USER_ASCE - BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP - TSTMSK __LC_CPU_FLAGS, _CIF_FPU - jz .Lsysc_skip_fpu - brasl %r14,load_fpu_regs -.Lsysc_skip_fpu: - mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) + mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) + BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP + lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) stpt __LC_EXIT_TIMER - lmg %r0,%r15,__PT_R0(%r11) b __LC_RETURN_LPSWE - -# -# One of the work bits is on. Find out which one. -# -.Lsysc_work: - ENABLE_INTS - TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED - jo .Lsysc_reschedule - TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL_RESTART - jo .Lsysc_syscall_restart -#ifdef CONFIG_UPROBES - TSTMSK __TI_flags(%r12),_TIF_UPROBE - jo .Lsysc_uprobe_notify -#endif - TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE - jo .Lsysc_guarded_storage - TSTMSK __PT_FLAGS(%r11),_PIF_PER_TRAP - jo .Lsysc_singlestep -#ifdef CONFIG_LIVEPATCH - TSTMSK __TI_flags(%r12),_TIF_PATCH_PENDING - jo .Lsysc_patch_pending # handle live patching just before - # signals and possible syscall restart -#endif - TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL_RESTART - jo .Lsysc_syscall_restart - TSTMSK __TI_flags(%r12),(_TIF_SIGPENDING|_TIF_NOTIFY_SIGNAL) - jnz .Lsysc_sigpending - TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME - jo .Lsysc_notify_resume - j .Lsysc_return - -# -# _TIF_NEED_RESCHED is set, call schedule -# -.Lsysc_reschedule: - larl %r14,.Lsysc_return - jg schedule - -# -# _TIF_SIGPENDING is set, call do_signal -# -.Lsysc_sigpending: - lgr %r2,%r11 # pass pointer to pt_regs - brasl %r14,do_signal - TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL - jno .Lsysc_return -.Lsysc_do_syscall: - lghi %r13,__TASK_thread - lmg %r2,%r7,__PT_R2(%r11) # load svc arguments - lghi %r1,0 # svc 0 returns -ENOSYS - j .Lsysc_do_svc - -# -# _TIF_NOTIFY_RESUME is set, call do_notify_resume -# -.Lsysc_notify_resume: - lgr %r2,%r11 # pass pointer to pt_regs - larl %r14,.Lsysc_return - jg do_notify_resume - -# -# _TIF_UPROBE is set, call uprobe_notify_resume -# -#ifdef CONFIG_UPROBES -.Lsysc_uprobe_notify: - lgr %r2,%r11 # pass pointer to pt_regs - larl %r14,.Lsysc_return - jg uprobe_notify_resume -#endif - -# -# _TIF_GUARDED_STORAGE is set, call guarded_storage_load -# -.Lsysc_guarded_storage: - lgr %r2,%r11 # pass pointer to pt_regs - larl %r14,.Lsysc_return - jg gs_load_bc_cb -# -# _TIF_PATCH_PENDING is set, call klp_update_patch_state -# -#ifdef CONFIG_LIVEPATCH -.Lsysc_patch_pending: - lg %r2,__LC_CURRENT # pass pointer to task struct - larl %r14,.Lsysc_return - jg klp_update_patch_state -#endif - -# -# _PIF_PER_TRAP is set, call do_per_trap -# -.Lsysc_singlestep: - ni __PT_FLAGS+7(%r11),255-_PIF_PER_TRAP - lgr %r2,%r11 # pass pointer to pt_regs - larl %r14,.Lsysc_return - jg do_per_trap - -# -# _PIF_SYSCALL_RESTART is set, repeat the current system call -# -.Lsysc_syscall_restart: - ni __PT_FLAGS+7(%r11),255-_PIF_SYSCALL_RESTART - lmg %r1,%r7,__PT_R1(%r11) # load svc arguments - lg %r2,__PT_ORIG_GPR2(%r11) - j .Lsysc_do_svc - -# -# call tracehook_report_syscall_entry/tracehook_report_syscall_exit before -# and after the system call -# -.Lsysc_tracesys: - lgr %r2,%r11 # pass pointer to pt_regs - la %r3,0 - llgh %r0,__PT_INT_CODE+2(%r11) - stg %r0,__PT_R2(%r11) - brasl %r14,do_syscall_trace_enter - lghi %r0,NR_syscalls - clgr %r0,%r2 - jnh .Lsysc_tracenogo - sllg %r8,%r2,3 - lg %r9,0(%r8,%r10) - lmg %r3,%r7,__PT_R3(%r11) - stg %r7,STACK_FRAME_OVERHEAD(%r15) - lg %r2,__PT_ORIG_GPR2(%r11) - BASR_EX %r14,%r9 # call sys_xxx - stg %r2,__PT_R2(%r11) # store return value -.Lsysc_tracenogo: - TSTMSK __TI_flags(%r12),_TIF_TRACE - jz .Lsysc_return - lgr %r2,%r11 # pass pointer to pt_regs - larl %r14,.Lsysc_return - jg do_syscall_trace_exit ENDPROC(system_call) # # a new process exits the kernel with ret_from_fork # ENTRY(ret_from_fork) - la %r11,STACK_FRAME_OVERHEAD(%r15) - lg %r12,__LC_CURRENT - brasl %r14,schedule_tail - tm __PT_PSW+1(%r11),0x01 # forking a kernel thread ? - jne .Lsysc_tracenogo - # it's a kernel thread - lmg %r9,%r10,__PT_R9(%r11) # load gprs - la %r2,0(%r10) - BASR_EX %r14,%r9 - j .Lsysc_tracenogo + lgr %r3,%r11 + brasl %r14,__ret_from_fork + lctlg %c1,%c1,__LC_USER_ASCE + mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) + BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP + lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) + stpt __LC_EXIT_TIMER + b __LC_RETURN_LPSWE ENDPROC(ret_from_fork) -ENTRY(kernel_thread_starter) - la %r2,0(%r10) - BASR_EX %r14,%r9 - j .Lsysc_tracenogo -ENDPROC(kernel_thread_starter) - /* * Program check handler routine */ ENTRY(pgm_check_handler) - stpt __LC_SYNC_ENTER_TIMER + stpt __LC_SYS_ENTER_TIMER BPOFF stmg %r8,%r15,__LC_SAVE_AREA_SYNC - lg %r10,__LC_LAST_BREAK - srag %r11,%r10,12 - jnz 0f - /* if __LC_LAST_BREAK is < 4096, it contains one of - * the lpswe addresses in lowcore. Set it to 1 (initial state) - * to prevent leaking that address to userspace. - */ - lghi %r10,1 -0: lg %r12,__LC_CURRENT - lghi %r11,0 + lg %r12,__LC_CURRENT + lghi %r10,0 lmg %r8,%r9,__LC_PGM_OLD_PSW tmhh %r8,0x0001 # coming from user space? jno .Lpgm_skip_asce lctlg %c1,%c1,__LC_KERNEL_ASCE - j 3f + j 3f # -> fault in user space .Lpgm_skip_asce: #if IS_ENABLED(CONFIG_KVM) # cleanup critical section for program checks in sie64a @@ -653,7 +328,7 @@ ENTRY(pgm_check_handler) ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce larl %r9,sie_exit # skip forward to sie_exit - lghi %r11,_PIF_GUEST_FAULT + lghi %r10,_PIF_GUEST_FAULT #endif 1: tmhh %r8,0x4000 # PER bit set in old PSW ? jnz 2f # -> enabled, can't be a double fault @@ -661,109 +336,84 @@ ENTRY(pgm_check_handler) jnz .Lpgm_svcper # -> single stepped svc 2: CHECK_STACK __LC_SAVE_AREA_SYNC aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - # CHECK_VMAP_STACK branches to stack_overflow or 5f - CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,5f -3: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER - BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP + # CHECK_VMAP_STACK branches to stack_overflow or 4f + CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f +3: BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP lg %r15,__LC_KERNEL_STACK - lgr %r14,%r12 - aghi %r14,__TASK_thread # pointer to thread_struct - lghi %r13,__LC_PGM_TDB - tm __LC_PGM_ILC+2,0x02 # check for transaction abort - jz 4f - mvc __THREAD_trap_tdb(256,%r14),0(%r13) -4: stg %r10,__THREAD_last_break(%r14) -5: lgr %r13,%r11 - la %r11,STACK_FRAME_OVERHEAD(%r15) +4: la %r11,STACK_FRAME_OVERHEAD(%r15) + stg %r10,__PT_FLAGS(%r11) + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC + stmg %r8,%r9,__PT_PSW(%r11) + # clear user controlled registers to prevent speculative use xgr %r0,%r0 xgr %r1,%r1 - xgr %r2,%r2 xgr %r3,%r3 xgr %r4,%r4 xgr %r5,%r5 xgr %r6,%r6 xgr %r7,%r7 - mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC - stmg %r8,%r9,__PT_PSW(%r11) - mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC - mvc __PT_INT_PARM_LONG(8,%r11),__LC_TRANS_EXC_CODE - stg %r13,__PT_FLAGS(%r11) - stg %r10,__PT_ARGS(%r11) - tm __LC_PGM_ILC+3,0x80 # check for per exception - jz 6f - tmhh %r8,0x0001 # kernel per event ? - jz .Lpgm_kprobe - oi __PT_FLAGS+7(%r11),_PIF_PER_TRAP - mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS - mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE - mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID -6: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - RESTORE_SM_CLEAR_PER - larl %r1,pgm_check_table - llgh %r10,__PT_INT_CODE+2(%r11) - nill %r10,0x007f - sll %r10,3 - je .Lpgm_return - lg %r9,0(%r10,%r1) # load address of handler routine - lgr %r2,%r11 # pass pointer to pt_regs - BASR_EX %r14,%r9 # branch to interrupt-handler -.Lpgm_return: - LOCKDEP_SYS_EXIT - tm __PT_PSW+1(%r11),0x01 # returning to user ? - jno .Lpgm_restore - TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL - jo .Lsysc_do_syscall - j .Lsysc_tif -.Lpgm_restore: - DISABLE_INTS - TSTMSK __LC_CPU_FLAGS, _CIF_FPU - jz .Lpgm_skip_fpu - brasl %r14,load_fpu_regs -.Lpgm_skip_fpu: - mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) + lgr %r2,%r11 + brasl %r14,__do_pgm_check + tmhh %r8,0x0001 # returning to user space? + jno .Lpgm_exit_kernel + lctlg %c1,%c1,__LC_USER_ASCE + BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP stpt __LC_EXIT_TIMER - lmg %r0,%r15,__PT_R0(%r11) +.Lpgm_exit_kernel: + mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) + lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) b __LC_RETURN_LPSWE # -# PER event in supervisor state, must be kprobes -# -.Lpgm_kprobe: - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - RESTORE_SM_CLEAR_PER - lgr %r2,%r11 # pass pointer to pt_regs - brasl %r14,do_per_trap - j .Lpgm_return - -# # single stepped system call # .Lpgm_svcper: mvc __LC_RETURN_PSW(8),__LC_SVC_NEW_PSW larl %r14,.Lsysc_per stg %r14,__LC_RETURN_PSW+8 - lghi %r14,_PIF_SYSCALL | _PIF_PER_TRAP + lghi %r14,1 lpswe __LC_RETURN_PSW # branch to .Lsysc_per ENDPROC(pgm_check_handler) /* - * IO interrupt handler routine + * Interrupt handler macro used for external and IO interrupts. */ -ENTRY(io_int_handler) +.macro INT_HANDLER name,lc_old_psw,handler +ENTRY(\name) STCK __LC_INT_CLOCK - stpt __LC_ASYNC_ENTER_TIMER + stpt __LC_SYS_ENTER_TIMER BPOFF stmg %r8,%r15,__LC_SAVE_AREA_ASYNC lg %r12,__LC_CURRENT - lmg %r8,%r9,__LC_IO_OLD_PSW - SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER,__LC_INT_CLOCK + lmg %r8,%r9,\lc_old_psw + tmhh %r8,0x0001 # interrupting from user ? + jnz 1f +#if IS_ENABLED(CONFIG_KVM) + lgr %r14,%r9 + larl %r13,.Lsie_gmap + slgr %r14,%r13 + lghi %r13,.Lsie_done - .Lsie_gmap + clgr %r14,%r13 + jhe 0f + brasl %r14,.Lcleanup_sie_int +#endif +0: CHECK_STACK __LC_SAVE_AREA_ASYNC + lgr %r11,%r15 + aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + stg %r11,__SF_BACKCHAIN(%r15) + j 2f +1: BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP + lctlg %c1,%c1,__LC_KERNEL_ASCE + lg %r15,__LC_KERNEL_STACK + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) +2: la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) # clear user controlled registers to prevent speculative use xgr %r0,%r0 xgr %r1,%r1 - xgr %r2,%r2 xgr %r3,%r3 xgr %r4,%r4 xgr %r5,%r5 @@ -772,323 +422,49 @@ ENTRY(io_int_handler) xgr %r10,%r10 mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC stmg %r8,%r9,__PT_PSW(%r11) - tm __PT_PSW+1(%r11),0x01 # coming from user space? - jno .Lio_skip_asce + tm %r8,0x0001 # coming from user space? + jno 1f lctlg %c1,%c1,__LC_KERNEL_ASCE -.Lio_skip_asce: - mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID - xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - TRACE_IRQS_OFF -.Lio_loop: - lgr %r2,%r11 # pass pointer to pt_regs - lghi %r3,IO_INTERRUPT - tm __PT_INT_CODE+8(%r11),0x80 # adapter interrupt ? - jz .Lio_call - lghi %r3,THIN_INTERRUPT -.Lio_call: - brasl %r14,do_IRQ - TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_LPAR - jz .Lio_return - tpi 0 - jz .Lio_return - mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID - j .Lio_loop -.Lio_return: - LOCKDEP_SYS_EXIT - TSTMSK __TI_flags(%r12),_TIF_WORK - jnz .Lio_work # there is work to do (signals etc.) - TSTMSK __LC_CPU_FLAGS,_CIF_WORK - jnz .Lio_work -.Lio_restore: - TRACE_IRQS_ON +1: lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,\handler mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) - tm __PT_PSW+1(%r11),0x01 # returning to user ? - jno .Lio_exit_kernel - DEBUG_USER_ASCE + tmhh %r8,0x0001 # returning to user ? + jno 2f lctlg %c1,%c1,__LC_USER_ASCE BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP stpt __LC_EXIT_TIMER -.Lio_exit_kernel: - lmg %r0,%r15,__PT_R0(%r11) +2: lmg %r0,%r15,__PT_R0(%r11) b __LC_RETURN_LPSWE -.Lio_done: +ENDPROC(\name) +.endm -# -# There is work todo, find out in which context we have been interrupted: -# 1) if we return to user space we can do all _TIF_WORK work -# 2) if we return to kernel code and kvm is enabled check if we need to -# modify the psw to leave SIE -# 3) if we return to kernel code and preemptive scheduling is enabled check -# the preemption counter and if it is zero call preempt_schedule_irq -# Before any work can be done, a switch to the kernel stack is required. -# -.Lio_work: - tm __PT_PSW+1(%r11),0x01 # returning to user ? - jo .Lio_work_user # yes -> do resched & signal -#ifdef CONFIG_PREEMPTION - # check for preemptive scheduling - icm %r0,15,__LC_PREEMPT_COUNT - jnz .Lio_restore # preemption is disabled - TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED - jno .Lio_restore - # switch to kernel stack - lg %r1,__PT_R15(%r11) - aghi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) - xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) - la %r11,STACK_FRAME_OVERHEAD(%r1) - lgr %r15,%r1 - brasl %r14,preempt_schedule_irq - j .Lio_return -#else - j .Lio_restore -#endif - -# -# Need to do work before returning to userspace, switch to kernel stack -# -.Lio_work_user: - lg %r1,__LC_KERNEL_STACK - mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) - xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) - la %r11,STACK_FRAME_OVERHEAD(%r1) - lgr %r15,%r1 - -# -# One of the work bits is on. Find out which one. -# - TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED - jo .Lio_reschedule -#ifdef CONFIG_LIVEPATCH - TSTMSK __TI_flags(%r12),_TIF_PATCH_PENDING - jo .Lio_patch_pending -#endif - TSTMSK __TI_flags(%r12),(_TIF_SIGPENDING|_TIF_NOTIFY_SIGNAL) - jnz .Lio_sigpending - TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME - jo .Lio_notify_resume - TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE - jo .Lio_guarded_storage - TSTMSK __LC_CPU_FLAGS,_CIF_FPU - jo .Lio_vxrs - j .Lio_return - -# -# CIF_FPU is set, restore floating-point controls and floating-point registers. -# -.Lio_vxrs: - larl %r14,.Lio_return - jg load_fpu_regs - -# -# _TIF_GUARDED_STORAGE is set, call guarded_storage_load -# -.Lio_guarded_storage: - ENABLE_INTS_TRACE - lgr %r2,%r11 # pass pointer to pt_regs - brasl %r14,gs_load_bc_cb - DISABLE_INTS_TRACE - j .Lio_return - -# -# _TIF_NEED_RESCHED is set, call schedule -# -.Lio_reschedule: - ENABLE_INTS_TRACE - brasl %r14,schedule # call scheduler - DISABLE_INTS_TRACE - j .Lio_return - -# -# _TIF_PATCH_PENDING is set, call klp_update_patch_state -# -#ifdef CONFIG_LIVEPATCH -.Lio_patch_pending: - lg %r2,__LC_CURRENT # pass pointer to task struct - larl %r14,.Lio_return - jg klp_update_patch_state -#endif - -# -# _TIF_SIGPENDING or is set, call do_signal -# -.Lio_sigpending: - ENABLE_INTS_TRACE - lgr %r2,%r11 # pass pointer to pt_regs - brasl %r14,do_signal - DISABLE_INTS_TRACE - j .Lio_return - -# -# _TIF_NOTIFY_RESUME or is set, call do_notify_resume -# -.Lio_notify_resume: - ENABLE_INTS_TRACE - lgr %r2,%r11 # pass pointer to pt_regs - brasl %r14,do_notify_resume - DISABLE_INTS_TRACE - j .Lio_return -ENDPROC(io_int_handler) - -/* - * External interrupt handler routine - */ -ENTRY(ext_int_handler) - STCK __LC_INT_CLOCK - stpt __LC_ASYNC_ENTER_TIMER - BPOFF - stmg %r8,%r15,__LC_SAVE_AREA_ASYNC - lg %r12,__LC_CURRENT - lmg %r8,%r9,__LC_EXT_OLD_PSW - SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER,__LC_INT_CLOCK - stmg %r0,%r7,__PT_R0(%r11) - # clear user controlled registers to prevent speculative use - xgr %r0,%r0 - xgr %r1,%r1 - xgr %r2,%r2 - xgr %r3,%r3 - xgr %r4,%r4 - xgr %r5,%r5 - xgr %r6,%r6 - xgr %r7,%r7 - xgr %r10,%r10 - mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC - stmg %r8,%r9,__PT_PSW(%r11) - tm __PT_PSW+1(%r11),0x01 # coming from user space? - jno .Lext_skip_asce - lctlg %c1,%c1,__LC_KERNEL_ASCE -.Lext_skip_asce: - lghi %r1,__LC_EXT_PARAMS2 - mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR - mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS - mvc __PT_INT_PARM_LONG(8,%r11),0(%r1) - xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - TRACE_IRQS_OFF - lgr %r2,%r11 # pass pointer to pt_regs - lghi %r3,EXT_INTERRUPT - brasl %r14,do_IRQ - j .Lio_return -ENDPROC(ext_int_handler) +INT_HANDLER ext_int_handler,__LC_EXT_OLD_PSW,do_ext_irq +INT_HANDLER io_int_handler,__LC_IO_OLD_PSW,do_io_irq /* * Load idle PSW. */ ENTRY(psw_idle) stg %r3,__SF_EMPTY(%r15) - larl %r1,.Lpsw_idle_exit + larl %r1,psw_idle_exit stg %r1,__SF_EMPTY+8(%r15) larl %r1,smp_cpu_mtid llgf %r1,0(%r1) ltgr %r1,%r1 jz .Lpsw_idle_stcctm - .insn rsy,0xeb0000000017,%r1,5,__SF_EMPTY+16(%r15) + .insn rsy,0xeb0000000017,%r1,5,__MT_CYCLES_ENTER(%r2) .Lpsw_idle_stcctm: oi __LC_CPU_FLAGS+7,_CIF_ENABLED_WAIT BPON STCK __CLOCK_IDLE_ENTER(%r2) stpt __TIMER_IDLE_ENTER(%r2) lpswe __SF_EMPTY(%r15) -.Lpsw_idle_exit: +.globl psw_idle_exit +psw_idle_exit: BR_EX %r14 ENDPROC(psw_idle) /* - * Store floating-point controls and floating-point or vector register - * depending whether the vector facility is available. A critical section - * cleanup assures that the registers are stored even if interrupted for - * some other work. The CIF_FPU flag is set to trigger a lazy restore - * of the register contents at return from io or a system call. - */ -ENTRY(save_fpu_regs) - stnsm __SF_EMPTY(%r15),0xfc - lg %r2,__LC_CURRENT - aghi %r2,__TASK_thread - TSTMSK __LC_CPU_FLAGS,_CIF_FPU - jo .Lsave_fpu_regs_exit - stfpc __THREAD_FPU_fpc(%r2) - lg %r3,__THREAD_FPU_regs(%r2) - TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX - jz .Lsave_fpu_regs_fp # no -> store FP regs - VSTM %v0,%v15,0,%r3 # vstm 0,15,0(3) - VSTM %v16,%v31,256,%r3 # vstm 16,31,256(3) - j .Lsave_fpu_regs_done # -> set CIF_FPU flag -.Lsave_fpu_regs_fp: - std 0,0(%r3) - std 1,8(%r3) - std 2,16(%r3) - std 3,24(%r3) - std 4,32(%r3) - std 5,40(%r3) - std 6,48(%r3) - std 7,56(%r3) - std 8,64(%r3) - std 9,72(%r3) - std 10,80(%r3) - std 11,88(%r3) - std 12,96(%r3) - std 13,104(%r3) - std 14,112(%r3) - std 15,120(%r3) -.Lsave_fpu_regs_done: - oi __LC_CPU_FLAGS+7,_CIF_FPU -.Lsave_fpu_regs_exit: - ssm __SF_EMPTY(%r15) - BR_EX %r14 -.Lsave_fpu_regs_end: -ENDPROC(save_fpu_regs) -EXPORT_SYMBOL(save_fpu_regs) - -/* - * Load floating-point controls and floating-point or vector registers. - * A critical section cleanup assures that the register contents are - * loaded even if interrupted for some other work. - * - * There are special calling conventions to fit into sysc and io return work: - * %r15: <kernel stack> - * The function requires: - * %r4 - */ -load_fpu_regs: - stnsm __SF_EMPTY(%r15),0xfc - lg %r4,__LC_CURRENT - aghi %r4,__TASK_thread - TSTMSK __LC_CPU_FLAGS,_CIF_FPU - jno .Lload_fpu_regs_exit - lfpc __THREAD_FPU_fpc(%r4) - TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX - lg %r4,__THREAD_FPU_regs(%r4) # %r4 <- reg save area - jz .Lload_fpu_regs_fp # -> no VX, load FP regs - VLM %v0,%v15,0,%r4 - VLM %v16,%v31,256,%r4 - j .Lload_fpu_regs_done -.Lload_fpu_regs_fp: - ld 0,0(%r4) - ld 1,8(%r4) - ld 2,16(%r4) - ld 3,24(%r4) - ld 4,32(%r4) - ld 5,40(%r4) - ld 6,48(%r4) - ld 7,56(%r4) - ld 8,64(%r4) - ld 9,72(%r4) - ld 10,80(%r4) - ld 11,88(%r4) - ld 12,96(%r4) - ld 13,104(%r4) - ld 14,112(%r4) - ld 15,120(%r4) -.Lload_fpu_regs_done: - ni __LC_CPU_FLAGS+7,255-_CIF_FPU -.Lload_fpu_regs_exit: - ssm __SF_EMPTY(%r15) - BR_EX %r14 -.Lload_fpu_regs_end: -ENDPROC(load_fpu_regs) - -/* * Machine check handler routines */ ENTRY(mcck_int_handler) @@ -1146,11 +522,8 @@ ENTRY(mcck_int_handler) mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) TSTMSK __LC_MCCK_CODE,MCCK_CODE_CPU_TIMER_VALID jo 3f - la %r14,__LC_SYNC_ENTER_TIMER - clc 0(8,%r14),__LC_ASYNC_ENTER_TIMER - jl 0f - la %r14,__LC_ASYNC_ENTER_TIMER -0: clc 0(8,%r14),__LC_EXIT_TIMER + la %r14,__LC_SYS_ENTER_TIMER + clc 0(8,%r14),__LC_EXIT_TIMER jl 1f la %r14,__LC_EXIT_TIMER 1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER @@ -1165,14 +538,32 @@ ENTRY(mcck_int_handler) TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID jno .Lmcck_panic 4: ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off - SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER,__LC_MCCK_CLOCK + tmhh %r8,0x0001 # interrupting from user ? + jnz .Lmcck_user +#if IS_ENABLED(CONFIG_KVM) + lgr %r14,%r9 + larl %r13,.Lsie_gmap + slgr %r14,%r13 + lghi %r13,.Lsie_done - .Lsie_gmap + clgr %r14,%r13 + jhe .Lmcck_stack + brasl %r14,.Lcleanup_sie_mcck +#endif + j .Lmcck_stack +.Lmcck_user: + BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP +.Lmcck_stack: + lg %r15,__LC_MCCK_STACK .Lmcck_skip: + la %r11,STACK_FRAME_OVERHEAD(%r15) + stctg %c1,%c1,__PT_CR1(%r11) + lctlg %c1,%c1,__LC_KERNEL_ASCE + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lghi %r14,__LC_GPREGS_SAVE_AREA+64 stmg %r0,%r7,__PT_R0(%r11) # clear user controlled registers to prevent speculative use xgr %r0,%r0 xgr %r1,%r1 - xgr %r2,%r2 xgr %r3,%r3 xgr %r4,%r4 xgr %r5,%r5 @@ -1181,9 +572,6 @@ ENTRY(mcck_int_handler) xgr %r10,%r10 mvc __PT_R8(64,%r11),0(%r14) stmg %r8,%r9,__PT_PSW(%r11) - la %r14,4095 - mvc __PT_CR1(8,%r11),__LC_CREGS_SAVE_AREA-4095+8(%r14) - lctlg %c1,%c1,__LC_KERNEL_ASCE xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs @@ -1195,9 +583,7 @@ ENTRY(mcck_int_handler) xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) la %r11,STACK_FRAME_OVERHEAD(%r1) lgr %r15,%r1 - TRACE_IRQS_OFF brasl %r14,s390_handle_mcck - TRACE_IRQS_ON .Lmcck_return: lctlg %c1,%c1,__PT_CR1(%r11) lmg %r0,%r10,__PT_R0(%r11) @@ -1211,7 +597,6 @@ ENTRY(mcck_int_handler) .Lmcck_panic: lg %r15,__LC_NODAT_STACK - la %r11,STACK_FRAME_OVERHEAD(%r15) j .Lmcck_skip ENDPROC(mcck_int_handler) @@ -1264,21 +649,20 @@ ENDPROC(stack_overflow) #endif #if IS_ENABLED(CONFIG_KVM) -.Lcleanup_sie: - cghi %r11,__LC_SAVE_AREA_ASYNC #Is this in normal interrupt? - je 1f +.Lcleanup_sie_mcck: larl %r13,.Lsie_entry slgr %r9,%r13 larl %r13,.Lsie_skip clgr %r9,%r13 - jh 1f + jh .Lcleanup_sie_int oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST -1: BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) +.Lcleanup_sie_int: + BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) lg %r9,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE lctlg %c1,%c1,__LC_KERNEL_ASCE larl %r9,sie_exit # skip forward to sie_exit - BR_EX %r14,%r11 + BR_EX %r14,%r13 #endif .section .rodata, "a" diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index a16c33b32ab0..3d0c0ac5c20e 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -17,8 +17,9 @@ void io_int_handler(void); void mcck_int_handler(void); void restart_int_handler(void); -asmlinkage long do_syscall_trace_enter(struct pt_regs *regs); -asmlinkage void do_syscall_trace_exit(struct pt_regs *regs); +void __ret_from_fork(struct task_struct *prev, struct pt_regs *regs); +void __do_pgm_check(struct pt_regs *regs); +void __do_syscall(struct pt_regs *regs, int per_trap); void do_protection_exception(struct pt_regs *regs); void do_dat_exception(struct pt_regs *regs); @@ -48,9 +49,7 @@ void translation_exception(struct pt_regs *regs); void vector_exception(struct pt_regs *regs); void monitor_event_exception(struct pt_regs *regs); -void do_per_trap(struct pt_regs *regs); void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str); -void syscall_trace(struct pt_regs *regs, int entryexit); void kernel_stack_overflow(struct pt_regs * regs); void do_signal(struct pt_regs *regs); void handle_signal32(struct ksignal *ksig, sigset_t *oldset, @@ -58,7 +57,8 @@ void handle_signal32(struct ksignal *ksig, sigset_t *oldset, void do_notify_resume(struct pt_regs *regs); void __init init_IRQ(void); -void do_IRQ(struct pt_regs *regs, int irq); +void do_io_irq(struct pt_regs *regs); +void do_ext_irq(struct pt_regs *regs); void do_restart(void); void __init startup_init(void); void die(struct pt_regs *regs, const char *str); @@ -82,8 +82,6 @@ long sys_s390_sthyi(unsigned long function_code, void __user *buffer, u64 __user DECLARE_PER_CPU(u64, mt_cycles[8]); -void gs_load_bc_cb(struct pt_regs *regs); - unsigned long stack_alloc(void); void stack_free(unsigned long stack); diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c index 0da378e2eb25..d864c9a325e2 100644 --- a/arch/s390/kernel/fpu.c +++ b/arch/s390/kernel/fpu.c @@ -175,3 +175,91 @@ void __kernel_fpu_end(struct kernel_fpu *state, u32 flags) : "1", "cc"); } EXPORT_SYMBOL(__kernel_fpu_end); + +void __load_fpu_regs(void) +{ + struct fpu *state = ¤t->thread.fpu; + unsigned long *regs = current->thread.fpu.regs; + + asm volatile("lfpc %0" : : "Q" (state->fpc)); + if (likely(MACHINE_HAS_VX)) { + asm volatile("lgr 1,%0\n" + "VLM 0,15,0,1\n" + "VLM 16,31,256,1\n" + : + : "d" (regs) + : "1", "cc", "memory"); + } else { + asm volatile("ld 0,%0" : : "Q" (regs[0])); + asm volatile("ld 1,%0" : : "Q" (regs[1])); + asm volatile("ld 2,%0" : : "Q" (regs[2])); + asm volatile("ld 3,%0" : : "Q" (regs[3])); + asm volatile("ld 4,%0" : : "Q" (regs[4])); + asm volatile("ld 5,%0" : : "Q" (regs[5])); + asm volatile("ld 6,%0" : : "Q" (regs[6])); + asm volatile("ld 7,%0" : : "Q" (regs[7])); + asm volatile("ld 8,%0" : : "Q" (regs[8])); + asm volatile("ld 9,%0" : : "Q" (regs[9])); + asm volatile("ld 10,%0" : : "Q" (regs[10])); + asm volatile("ld 11,%0" : : "Q" (regs[11])); + asm volatile("ld 12,%0" : : "Q" (regs[12])); + asm volatile("ld 13,%0" : : "Q" (regs[13])); + asm volatile("ld 14,%0" : : "Q" (regs[14])); + asm volatile("ld 15,%0" : : "Q" (regs[15])); + } + clear_cpu_flag(CIF_FPU); +} +EXPORT_SYMBOL(__load_fpu_regs); + +void load_fpu_regs(void) +{ + raw_local_irq_disable(); + __load_fpu_regs(); + raw_local_irq_enable(); +} +EXPORT_SYMBOL(load_fpu_regs); + +void save_fpu_regs(void) +{ + unsigned long flags, *regs; + struct fpu *state; + + local_irq_save(flags); + + if (test_cpu_flag(CIF_FPU)) + goto out; + + state = ¤t->thread.fpu; + regs = current->thread.fpu.regs; + + asm volatile("stfpc %0" : "=Q" (state->fpc)); + if (likely(MACHINE_HAS_VX)) { + asm volatile("lgr 1,%0\n" + "VSTM 0,15,0,1\n" + "VSTM 16,31,256,1\n" + : + : "d" (regs) + : "1", "cc", "memory"); + } else { + asm volatile("std 0,%0" : "=Q" (regs[0])); + asm volatile("std 1,%0" : "=Q" (regs[1])); + asm volatile("std 2,%0" : "=Q" (regs[2])); + asm volatile("std 3,%0" : "=Q" (regs[3])); + asm volatile("std 4,%0" : "=Q" (regs[4])); + asm volatile("std 5,%0" : "=Q" (regs[5])); + asm volatile("std 6,%0" : "=Q" (regs[6])); + asm volatile("std 7,%0" : "=Q" (regs[7])); + asm volatile("std 8,%0" : "=Q" (regs[8])); + asm volatile("std 9,%0" : "=Q" (regs[9])); + asm volatile("std 10,%0" : "=Q" (regs[10])); + asm volatile("std 11,%0" : "=Q" (regs[11])); + asm volatile("std 12,%0" : "=Q" (regs[12])); + asm volatile("std 13,%0" : "=Q" (regs[13])); + asm volatile("std 14,%0" : "=Q" (regs[14])); + asm volatile("std 15,%0" : "=Q" (regs[15])); + } + set_cpu_flag(CIF_FPU); +out: + local_irq_restore(flags); +} +EXPORT_SYMBOL(save_fpu_regs); diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index a5d4d80d6ede..812073ea073e 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -14,12 +14,36 @@ #include <linux/cpu.h> #include <linux/sched/cputime.h> #include <trace/events/power.h> +#include <asm/cpu_mf.h> #include <asm/nmi.h> #include <asm/smp.h> #include "entry.h" static DEFINE_PER_CPU(struct s390_idle_data, s390_idle); +void account_idle_time_irq(void) +{ + struct s390_idle_data *idle = this_cpu_ptr(&s390_idle); + u64 cycles_new[8]; + int i; + + clear_cpu_flag(CIF_ENABLED_WAIT); + if (smp_cpu_mtid) { + stcctm(MT_DIAG, smp_cpu_mtid, cycles_new); + for (i = 0; i < smp_cpu_mtid; i++) + this_cpu_add(mt_cycles[i], cycles_new[i] - idle->mt_cycles_enter[i]); + } + + idle->clock_idle_exit = S390_lowcore.int_clock; + idle->timer_idle_exit = S390_lowcore.sys_enter_timer; + + S390_lowcore.steal_timer += idle->clock_idle_enter - S390_lowcore.last_update_clock; + S390_lowcore.last_update_clock = idle->clock_idle_exit; + + S390_lowcore.system_timer += S390_lowcore.last_update_timer - idle->timer_idle_enter; + S390_lowcore.last_update_timer = idle->timer_idle_exit; +} + void arch_cpu_idle(void) { struct s390_idle_data *idle = this_cpu_ptr(&s390_idle); diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index f8a8b9428ae2..601c21791338 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -21,12 +21,14 @@ #include <linux/init.h> #include <linux/cpu.h> #include <linux/irq.h> +#include <linux/entry-common.h> #include <asm/irq_regs.h> #include <asm/cputime.h> #include <asm/lowcore.h> #include <asm/irq.h> #include <asm/hw_irq.h> #include <asm/stacktrace.h> +#include <asm/softirq_stack.h> #include "entry.h" DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat); @@ -95,19 +97,97 @@ static const struct irq_class irqclass_sub_desc[] = { {.irq = CPU_RST, .name = "RST", .desc = "[CPU] CPU Restart"}, }; -void do_IRQ(struct pt_regs *regs, int irq) +static void do_IRQ(struct pt_regs *regs, int irq) { - struct pt_regs *old_regs; - - old_regs = set_irq_regs(regs); - irq_enter(); if (tod_after_eq(S390_lowcore.int_clock, S390_lowcore.clock_comparator)) /* Serve timer interrupts first. */ clock_comparator_work(); generic_handle_irq(irq); +} + +static int on_async_stack(void) +{ + unsigned long frame = current_frame_address(); + + return !!!((S390_lowcore.async_stack - frame) >> (PAGE_SHIFT + THREAD_SIZE_ORDER)); +} + +static void do_irq_async(struct pt_regs *regs, int irq) +{ + if (on_async_stack()) + do_IRQ(regs, irq); + else + CALL_ON_STACK(do_IRQ, S390_lowcore.async_stack, 2, regs, irq); +} + +static int irq_pending(struct pt_regs *regs) +{ + int cc; + + asm volatile("tpi 0\n" + "ipm %0" : "=d" (cc) : : "cc"); + return cc >> 28; +} + +void noinstr do_io_irq(struct pt_regs *regs) +{ + irqentry_state_t state = irqentry_enter(regs); + struct pt_regs *old_regs = set_irq_regs(regs); + int from_idle; + + irq_enter(); + + if (user_mode(regs)) + update_timer_sys(); + + from_idle = !user_mode(regs) && regs->psw.addr == (unsigned long)psw_idle_exit; + if (from_idle) + account_idle_time_irq(); + + do { + memcpy(®s->int_code, &S390_lowcore.subchannel_id, 12); + if (S390_lowcore.io_int_word & BIT(31)) + do_irq_async(regs, THIN_INTERRUPT); + else + do_irq_async(regs, IO_INTERRUPT); + } while (MACHINE_IS_LPAR && irq_pending(regs)); + + irq_exit(); + set_irq_regs(old_regs); + irqentry_exit(regs, state); + + if (from_idle) + regs->psw.mask &= ~(PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_WAIT); +} + +void noinstr do_ext_irq(struct pt_regs *regs) +{ + irqentry_state_t state = irqentry_enter(regs); + struct pt_regs *old_regs = set_irq_regs(regs); + int from_idle; + + irq_enter(); + + if (user_mode(regs)) + update_timer_sys(); + + memcpy(®s->int_code, &S390_lowcore.ext_cpu_addr, 4); + regs->int_parm = S390_lowcore.ext_params; + regs->int_parm_long = *(unsigned long *)S390_lowcore.ext_params2; + + from_idle = !user_mode(regs) && regs->psw.addr == (unsigned long)psw_idle_exit; + if (from_idle) + account_idle_time_irq(); + + do_irq_async(regs, EXT_INTERRUPT); + irq_exit(); set_irq_regs(old_regs); + irqentry_exit(regs, state); + + if (from_idle) + regs->psw.mask &= ~(PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_WAIT); } static void show_msi_interrupt(struct seq_file *p, int irq) diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 86c8d5370e7f..11f8c296f60d 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -131,12 +131,11 @@ static notrace void s390_handle_damage(void) NOKPROBE_SYMBOL(s390_handle_damage); /* - * Main machine check handler function. Will be called with interrupts enabled - * or disabled and machine checks enabled or disabled. + * Main machine check handler function. Will be called with interrupts disabled + * and machine checks enabled. */ -void s390_handle_mcck(void) +void __s390_handle_mcck(void) { - unsigned long flags; struct mcck_struct mcck; /* @@ -144,12 +143,10 @@ void s390_handle_mcck(void) * machine checks. Afterwards delete the old state and enable machine * checks again. */ - local_irq_save(flags); local_mcck_disable(); mcck = *this_cpu_ptr(&cpu_mcck); memset(this_cpu_ptr(&cpu_mcck), 0, sizeof(mcck)); local_mcck_enable(); - local_irq_restore(flags); if (mcck.channel_report) crw_handle_channel_report(); @@ -181,8 +178,13 @@ void s390_handle_mcck(void) do_exit(SIGSEGV); } } -EXPORT_SYMBOL_GPL(s390_handle_mcck); +void noinstr s390_handle_mcck(void) +{ + trace_hardirqs_off(); + __s390_handle_mcck(); + trace_hardirqs_on(); +} /* * returns 0 if all required registers are available * returns 1 otherwise @@ -344,6 +346,9 @@ int notrace s390_do_machine_check(struct pt_regs *regs) int mcck_pending = 0; nmi_enter(); + + if (user_mode(regs)) + update_timer_mcck(); inc_irq_stat(NMI_NMI); mci.val = S390_lowcore.mcck_interruption_code; mcck = this_cpu_ptr(&cpu_mcck); diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c index e949ab832ed7..db4877bbb9aa 100644 --- a/arch/s390/kernel/perf_cpum_cf_diag.c +++ b/arch/s390/kernel/perf_cpum_cf_diag.c @@ -2,7 +2,7 @@ /* * Performance event support for s390x - CPU-measurement Counter Sets * - * Copyright IBM Corp. 2019 + * Copyright IBM Corp. 2019, 2021 * Author(s): Hendrik Brueckner <brueckner@linux.ibm.com> * Thomas Richer <tmricht@linux.ibm.com> */ @@ -17,6 +17,8 @@ #include <linux/export.h> #include <linux/slab.h> #include <linux/processor.h> +#include <linux/miscdevice.h> +#include <linux/mutex.h> #include <asm/ctl_reg.h> #include <asm/irq.h> @@ -24,15 +26,20 @@ #include <asm/timex.h> #include <asm/debug.h> -#define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */ +#include <asm/perf_cpum_cf_diag.h> +#define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */ +#define CF_DIAG_MIN_INTERVAL 60 /* Minimum counter set read */ + /* interval in seconds */ +static unsigned long cf_diag_interval = CF_DIAG_MIN_INTERVAL; static unsigned int cf_diag_cpu_speed; static debug_info_t *cf_diag_dbg; -struct cf_diag_csd { /* Counter set data per CPU */ +struct cf_diag_csd { /* Counter set data per CPU */ size_t used; /* Bytes used in data/start */ unsigned char start[PAGE_SIZE]; /* Counter set at event start */ unsigned char data[PAGE_SIZE]; /* Counter set at event delete */ + unsigned int sets; /* # Counter set saved in data */ }; static DEFINE_PER_CPU(struct cf_diag_csd, cf_diag_csd); @@ -118,8 +125,8 @@ static void cf_diag_trailer(struct cf_trailer_entry *te) if (te->cpu_speed) te->speed = 1; te->clock_base = 1; /* Save clock base */ - memcpy(&te->tod_base, &tod_clock_base[1], 8); - store_tod_clock((__u64 *)&te->timestamp); + te->tod_base = tod_clock_base.tod; + te->timestamp = get_tod_clock_fast(); } /* @@ -178,18 +185,35 @@ static void cf_diag_disable(struct pmu *pmu) /* Number of perf events counting hardware events */ static atomic_t cf_diag_events = ATOMIC_INIT(0); +/* Used to avoid races in calling reserve/release_cpumf_hardware */ +static DEFINE_MUTEX(cf_diag_reserve_mutex); /* Release the PMU if event is the last perf event */ static void cf_diag_perf_event_destroy(struct perf_event *event) { debug_sprintf_event(cf_diag_dbg, 5, "%s event %p cpu %d cf_diag_events %d\n", - __func__, event, event->cpu, + __func__, event, smp_processor_id(), atomic_read(&cf_diag_events)); if (atomic_dec_return(&cf_diag_events) == 0) __kernel_cpumcf_end(); } +static int get_authctrsets(void) +{ + struct cpu_cf_events *cpuhw; + unsigned long auth = 0; + enum cpumf_ctr_set i; + + cpuhw = &get_cpu_var(cpu_cf_events); + for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { + if (cpuhw->info.auth_ctl & cpumf_ctr_ctl[i]) + auth |= cpumf_ctr_ctl[i]; + } + put_cpu_var(cpu_cf_events); + return auth; +} + /* Setup the event. Test for authorized counter sets and only include counter * sets which are authorized at the time of the setup. Including unauthorized * counter sets result in specification exception (and panic). @@ -197,15 +221,12 @@ static void cf_diag_perf_event_destroy(struct perf_event *event) static int __hw_perf_event_init(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; - struct cpu_cf_events *cpuhw; - enum cpumf_ctr_set i; int err = 0; debug_sprintf_event(cf_diag_dbg, 5, "%s event %p cpu %d\n", __func__, event, event->cpu); event->hw.config = attr->config; - event->hw.config_base = 0; /* Add all authorized counter sets to config_base. The * the hardware init function is either called per-cpu or just once @@ -215,11 +236,7 @@ static int __hw_perf_event_init(struct perf_event *event) * Checking the authorization on any CPU is fine as the hardware * applies the same authorization settings to all CPUs. */ - cpuhw = &get_cpu_var(cpu_cf_events); - for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) - if (cpuhw->info.auth_ctl & cpumf_ctr_ctl[i]) - event->hw.config_base |= cpumf_ctr_ctl[i]; - put_cpu_var(cpu_cf_events); + event->hw.config_base = get_authctrsets(); /* No authorized counter sets, nothing to count/sample */ if (!event->hw.config_base) { @@ -237,6 +254,25 @@ out: return err; } +/* Return 0 if the CPU-measurement counter facility is currently free + * and an error otherwise. + */ +static int cf_diag_perf_event_inuse(void) +{ + int err = 0; + + if (!atomic_inc_not_zero(&cf_diag_events)) { + mutex_lock(&cf_diag_reserve_mutex); + if (atomic_read(&cf_diag_events) == 0 && + __kernel_cpumcf_begin()) + err = -EBUSY; + else + err = atomic_inc_return(&cf_diag_events); + mutex_unlock(&cf_diag_reserve_mutex); + } + return err; +} + static int cf_diag_event_init(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; @@ -264,13 +300,9 @@ static int cf_diag_event_init(struct perf_event *event) } /* Initialize for using the CPU-measurement counter facility */ - if (atomic_inc_return(&cf_diag_events) == 1) { - if (__kernel_cpumcf_begin()) { - atomic_dec(&cf_diag_events); - err = -EBUSY; - goto out; - } - } + err = cf_diag_perf_event_inuse(); + if (err < 0) + goto out; event->destroy = cf_diag_perf_event_destroy; err = __hw_perf_event_init(event); @@ -599,6 +631,8 @@ static void cf_diag_del(struct perf_event *event, int flags) cpuhw->flags &= ~PMU_F_IN_USE; } +/* Default counter set events and format attribute groups */ + CPUMF_EVENT_ATTR(CF_DIAG, CF_DIAG, PERF_EVENT_CPUM_CF_DIAG); static struct attribute *cf_diag_events_attr[] = { @@ -663,6 +697,452 @@ static void cf_diag_get_cpu_speed(void) } } +/* Code to create device and file I/O operations */ +static atomic_t ctrset_opencnt = ATOMIC_INIT(0); /* Excl. access */ + +static int cf_diag_open(struct inode *inode, struct file *file) +{ + int err = 0; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (atomic_xchg(&ctrset_opencnt, 1)) + return -EBUSY; + + /* Avoid concurrent access with perf_event_open() system call */ + mutex_lock(&cf_diag_reserve_mutex); + if (atomic_read(&cf_diag_events) || __kernel_cpumcf_begin()) + err = -EBUSY; + mutex_unlock(&cf_diag_reserve_mutex); + if (err) { + atomic_set(&ctrset_opencnt, 0); + return err; + } + file->private_data = NULL; + debug_sprintf_event(cf_diag_dbg, 2, "%s\n", __func__); + /* nonseekable_open() never fails */ + return nonseekable_open(inode, file); +} + +/* Variables for ioctl() interface support */ +static DEFINE_MUTEX(cf_diag_ctrset_mutex); +static struct cf_diag_ctrset { + unsigned long ctrset; /* Bit mask of counter set to read */ + cpumask_t mask; /* CPU mask to read from */ + time64_t lastread; /* Epoch counter set last read */ +} cf_diag_ctrset; + +static void cf_diag_ctrset_clear(void) +{ + cpumask_clear(&cf_diag_ctrset.mask); + cf_diag_ctrset.ctrset = 0; +} + +static void cf_diag_release_cpu(void *p) +{ + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + + debug_sprintf_event(cf_diag_dbg, 3, "%s cpu %d\n", __func__, + smp_processor_id()); + lcctl(0); /* Reset counter sets */ + cpuhw->state = 0; /* Save state in CPU hardware state */ +} + +/* Release function is also called when application gets terminated without + * doing a proper ioctl(..., S390_HWCTR_STOP, ...) command. + * Since only one application is allowed to open the device, simple stop all + * CPU counter sets. + */ +static int cf_diag_release(struct inode *inode, struct file *file) +{ + on_each_cpu(cf_diag_release_cpu, NULL, 1); + cf_diag_ctrset_clear(); + atomic_set(&ctrset_opencnt, 0); + __kernel_cpumcf_end(); + debug_sprintf_event(cf_diag_dbg, 2, "%s\n", __func__); + return 0; +} + +struct cf_diag_call_on_cpu_parm { /* Parm struct for smp_call_on_cpu */ + unsigned int sets; /* Counter set bit mask */ + atomic_t cpus_ack; /* # CPUs successfully executed func */ +}; + +static int cf_diag_all_copy(unsigned long arg, cpumask_t *mask) +{ + struct s390_ctrset_read __user *ctrset_read; + unsigned int cpu, cpus, rc; + void __user *uptr; + + ctrset_read = (struct s390_ctrset_read __user *)arg; + uptr = ctrset_read->data; + for_each_cpu(cpu, mask) { + struct cf_diag_csd *csd = per_cpu_ptr(&cf_diag_csd, cpu); + struct s390_ctrset_cpudata __user *ctrset_cpudata; + + ctrset_cpudata = uptr; + debug_sprintf_event(cf_diag_dbg, 5, "%s cpu %d used %zd\n", + __func__, cpu, csd->used); + rc = put_user(cpu, &ctrset_cpudata->cpu_nr); + rc |= put_user(csd->sets, &ctrset_cpudata->no_sets); + rc |= copy_to_user(ctrset_cpudata->data, csd->data, csd->used); + if (rc) + return -EFAULT; + uptr += sizeof(struct s390_ctrset_cpudata) + csd->used; + cond_resched(); + } + cpus = cpumask_weight(mask); + if (put_user(cpus, &ctrset_read->no_cpus)) + return -EFAULT; + debug_sprintf_event(cf_diag_dbg, 5, "%s copied %ld\n", + __func__, uptr - (void __user *)ctrset_read->data); + return 0; +} + +static size_t cf_diag_cpuset_read(struct s390_ctrset_setdata *p, int ctrset, + int ctrset_size, size_t room) +{ + size_t need = 0; + int rc = -1; + + need = sizeof(*p) + sizeof(u64) * ctrset_size; + debug_sprintf_event(cf_diag_dbg, 5, + "%s room %zd need %zd set %#x set_size %d\n", + __func__, room, need, ctrset, ctrset_size); + if (need <= room) { + p->set = cpumf_ctr_ctl[ctrset]; + p->no_cnts = ctrset_size; + rc = ctr_stcctm(ctrset, ctrset_size, (u64 *)p->cv); + if (rc == 3) /* Nothing stored */ + need = 0; + } + debug_sprintf_event(cf_diag_dbg, 5, "%s need %zd rc %d\n", __func__, + need, rc); + return need; +} + +/* Read all counter sets. Since the perf_event_open() system call with + * event cpum_cf_diag/.../ is blocked when this interface is active, reuse + * the perf_event_open() data buffer to store the counter sets. + */ +static void cf_diag_cpu_read(void *parm) +{ + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cf_diag_csd *csd = this_cpu_ptr(&cf_diag_csd); + struct cf_diag_call_on_cpu_parm *p = parm; + int set, set_size; + size_t space; + + debug_sprintf_event(cf_diag_dbg, 5, + "%s new %#x flags %#x state %#llx\n", + __func__, p->sets, cpuhw->flags, + cpuhw->state); + /* No data saved yet */ + csd->used = 0; + csd->sets = 0; + memset(csd->data, 0, sizeof(csd->data)); + + /* Scan the counter sets */ + for (set = CPUMF_CTR_SET_BASIC; set < CPUMF_CTR_SET_MAX; ++set) { + struct s390_ctrset_setdata *sp = (void *)csd->data + csd->used; + + if (!(p->sets & cpumf_ctr_ctl[set])) + continue; /* Counter set not in list */ + set_size = cf_diag_ctrset_size(set, &cpuhw->info); + space = sizeof(csd->data) - csd->used; + space = cf_diag_cpuset_read(sp, set, set_size, space); + if (space) { + csd->used += space; + csd->sets += 1; + } + debug_sprintf_event(cf_diag_dbg, 5, "%s sp %px space %zd\n", + __func__, sp, space); + } + debug_sprintf_event(cf_diag_dbg, 5, "%s sets %d used %zd\n", __func__, + csd->sets, csd->used); +} + +static int cf_diag_all_read(unsigned long arg) +{ + struct cf_diag_call_on_cpu_parm p; + cpumask_var_t mask; + time64_t now; + int rc = 0; + + debug_sprintf_event(cf_diag_dbg, 5, "%s\n", __func__); + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + now = ktime_get_seconds(); + if (cf_diag_ctrset.lastread + cf_diag_interval > now) { + debug_sprintf_event(cf_diag_dbg, 5, "%s now %lld " + " lastread %lld\n", __func__, now, + cf_diag_ctrset.lastread); + rc = -EAGAIN; + goto out; + } else { + cf_diag_ctrset.lastread = now; + } + p.sets = cf_diag_ctrset.ctrset; + cpumask_and(mask, &cf_diag_ctrset.mask, cpu_online_mask); + on_each_cpu_mask(mask, cf_diag_cpu_read, &p, 1); + rc = cf_diag_all_copy(arg, mask); +out: + free_cpumask_var(mask); + debug_sprintf_event(cf_diag_dbg, 5, "%s rc %d\n", __func__, rc); + return rc; +} + +/* Stop all counter sets via ioctl interface */ +static void cf_diag_ioctl_off(void *parm) +{ + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cf_diag_call_on_cpu_parm *p = parm; + int rc; + + debug_sprintf_event(cf_diag_dbg, 5, + "%s new %#x flags %#x state %#llx\n", + __func__, p->sets, cpuhw->flags, + cpuhw->state); + + ctr_set_multiple_disable(&cpuhw->state, p->sets); + ctr_set_multiple_stop(&cpuhw->state, p->sets); + rc = lcctl(cpuhw->state); /* Stop counter sets */ + if (!cpuhw->state) + cpuhw->flags &= ~PMU_F_IN_USE; + debug_sprintf_event(cf_diag_dbg, 5, + "%s rc %d flags %#x state %#llx\n", __func__, + rc, cpuhw->flags, cpuhw->state); +} + +/* Start counter sets on particular CPU */ +static void cf_diag_ioctl_on(void *parm) +{ + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cf_diag_call_on_cpu_parm *p = parm; + int rc; + + debug_sprintf_event(cf_diag_dbg, 5, + "%s new %#x flags %#x state %#llx\n", + __func__, p->sets, cpuhw->flags, + cpuhw->state); + + if (!(cpuhw->flags & PMU_F_IN_USE)) + cpuhw->state = 0; + cpuhw->flags |= PMU_F_IN_USE; + rc = lcctl(cpuhw->state); /* Reset unused counter sets */ + ctr_set_multiple_enable(&cpuhw->state, p->sets); + ctr_set_multiple_start(&cpuhw->state, p->sets); + rc |= lcctl(cpuhw->state); /* Start counter sets */ + if (!rc) + atomic_inc(&p->cpus_ack); + debug_sprintf_event(cf_diag_dbg, 5, "%s rc %d state %#llx\n", + __func__, rc, cpuhw->state); +} + +static int cf_diag_all_stop(void) +{ + struct cf_diag_call_on_cpu_parm p = { + .sets = cf_diag_ctrset.ctrset, + }; + cpumask_var_t mask; + + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + cpumask_and(mask, &cf_diag_ctrset.mask, cpu_online_mask); + on_each_cpu_mask(mask, cf_diag_ioctl_off, &p, 1); + free_cpumask_var(mask); + return 0; +} + +static int cf_diag_all_start(void) +{ + struct cf_diag_call_on_cpu_parm p = { + .sets = cf_diag_ctrset.ctrset, + .cpus_ack = ATOMIC_INIT(0), + }; + cpumask_var_t mask; + int rc = 0; + + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + cpumask_and(mask, &cf_diag_ctrset.mask, cpu_online_mask); + on_each_cpu_mask(mask, cf_diag_ioctl_on, &p, 1); + if (atomic_read(&p.cpus_ack) != cpumask_weight(mask)) { + on_each_cpu_mask(mask, cf_diag_ioctl_off, &p, 1); + rc = -EIO; + } + free_cpumask_var(mask); + return rc; +} + +/* Return the maximum required space for all possible CPUs in case one + * CPU will be onlined during the START, READ, STOP cycles. + * To find out the size of the counter sets, any one CPU will do. They + * all have the same counter sets. + */ +static size_t cf_diag_needspace(unsigned int sets) +{ + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + size_t bytes = 0; + int i; + + for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { + if (!(sets & cpumf_ctr_ctl[i])) + continue; + bytes += cf_diag_ctrset_size(i, &cpuhw->info) * sizeof(u64) + + sizeof(((struct s390_ctrset_setdata *)0)->set) + + sizeof(((struct s390_ctrset_setdata *)0)->no_cnts); + } + bytes = sizeof(((struct s390_ctrset_read *)0)->no_cpus) + nr_cpu_ids * + (bytes + sizeof(((struct s390_ctrset_cpudata *)0)->cpu_nr) + + sizeof(((struct s390_ctrset_cpudata *)0)->no_sets)); + debug_sprintf_event(cf_diag_dbg, 5, "%s bytes %ld\n", __func__, + bytes); + return bytes; +} + +static long cf_diag_ioctl_read(unsigned long arg) +{ + struct s390_ctrset_read read; + int ret = 0; + + debug_sprintf_event(cf_diag_dbg, 5, "%s\n", __func__); + if (copy_from_user(&read, (char __user *)arg, sizeof(read))) + return -EFAULT; + ret = cf_diag_all_read(arg); + debug_sprintf_event(cf_diag_dbg, 5, "%s ret %d\n", __func__, ret); + return ret; +} + +static long cf_diag_ioctl_stop(void) +{ + int ret; + + debug_sprintf_event(cf_diag_dbg, 5, "%s\n", __func__); + ret = cf_diag_all_stop(); + cf_diag_ctrset_clear(); + debug_sprintf_event(cf_diag_dbg, 5, "%s ret %d\n", __func__, ret); + return ret; +} + +static long cf_diag_ioctl_start(unsigned long arg) +{ + struct s390_ctrset_start __user *ustart; + struct s390_ctrset_start start; + void __user *umask; + unsigned int len; + int ret = 0; + size_t need; + + if (cf_diag_ctrset.ctrset) + return -EBUSY; + ustart = (struct s390_ctrset_start __user *)arg; + if (copy_from_user(&start, ustart, sizeof(start))) + return -EFAULT; + if (start.version != S390_HWCTR_START_VERSION) + return -EINVAL; + if (start.counter_sets & ~(cpumf_ctr_ctl[CPUMF_CTR_SET_BASIC] | + cpumf_ctr_ctl[CPUMF_CTR_SET_USER] | + cpumf_ctr_ctl[CPUMF_CTR_SET_CRYPTO] | + cpumf_ctr_ctl[CPUMF_CTR_SET_EXT] | + cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG])) + return -EINVAL; /* Invalid counter set */ + if (!start.counter_sets) + return -EINVAL; /* No counter set at all? */ + cpumask_clear(&cf_diag_ctrset.mask); + len = min_t(u64, start.cpumask_len, cpumask_size()); + umask = (void __user *)start.cpumask; + if (copy_from_user(&cf_diag_ctrset.mask, umask, len)) + return -EFAULT; + if (cpumask_empty(&cf_diag_ctrset.mask)) + return -EINVAL; + need = cf_diag_needspace(start.counter_sets); + if (put_user(need, &ustart->data_bytes)) + ret = -EFAULT; + if (ret) + goto out; + cf_diag_ctrset.ctrset = start.counter_sets; + ret = cf_diag_all_start(); +out: + if (ret) + cf_diag_ctrset_clear(); + debug_sprintf_event(cf_diag_dbg, 2, "%s sets %#lx need %ld ret %d\n", + __func__, cf_diag_ctrset.ctrset, need, ret); + return ret; +} + +static long cf_diag_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + int ret; + + debug_sprintf_event(cf_diag_dbg, 2, "%s cmd %#x arg %lx\n", __func__, + cmd, arg); + get_online_cpus(); + mutex_lock(&cf_diag_ctrset_mutex); + switch (cmd) { + case S390_HWCTR_START: + ret = cf_diag_ioctl_start(arg); + break; + case S390_HWCTR_STOP: + ret = cf_diag_ioctl_stop(); + break; + case S390_HWCTR_READ: + ret = cf_diag_ioctl_read(arg); + break; + default: + ret = -ENOTTY; + break; + } + mutex_unlock(&cf_diag_ctrset_mutex); + put_online_cpus(); + debug_sprintf_event(cf_diag_dbg, 2, "%s ret %d\n", __func__, ret); + return ret; +} + +static const struct file_operations cf_diag_fops = { + .owner = THIS_MODULE, + .open = cf_diag_open, + .release = cf_diag_release, + .unlocked_ioctl = cf_diag_ioctl, + .compat_ioctl = cf_diag_ioctl, + .llseek = no_llseek +}; + +static struct miscdevice cf_diag_dev = { + .name = S390_HWCTR_DEVICE, + .minor = MISC_DYNAMIC_MINOR, + .fops = &cf_diag_fops, +}; + +static int cf_diag_online_cpu(unsigned int cpu) +{ + struct cf_diag_call_on_cpu_parm p; + + mutex_lock(&cf_diag_ctrset_mutex); + if (!cf_diag_ctrset.ctrset) + goto out; + p.sets = cf_diag_ctrset.ctrset; + cf_diag_ioctl_on(&p); +out: + mutex_unlock(&cf_diag_ctrset_mutex); + return 0; +} + +static int cf_diag_offline_cpu(unsigned int cpu) +{ + struct cf_diag_call_on_cpu_parm p; + + mutex_lock(&cf_diag_ctrset_mutex); + if (!cf_diag_ctrset.ctrset) + goto out; + p.sets = cf_diag_ctrset.ctrset; + cf_diag_ioctl_off(&p); +out: + mutex_unlock(&cf_diag_ctrset_mutex); + return 0; +} + /* Initialize the counter set PMU to generate complete counter set data as * event raw data. This relies on the CPU Measurement Counter Facility device * already being loaded and initialized. @@ -685,21 +1165,43 @@ static int __init cf_diag_init(void) return -ENOMEM; } + rc = misc_register(&cf_diag_dev); + if (rc) { + pr_err("Registration of /dev/" S390_HWCTR_DEVICE + "failed rc=%d\n", rc); + goto out; + } + /* Setup s390dbf facility */ cf_diag_dbg = debug_register(KMSG_COMPONENT, 2, 1, 128); if (!cf_diag_dbg) { pr_err("Registration of s390dbf(cpum_cf_diag) failed\n"); - return -ENOMEM; + rc = -ENOMEM; + goto out_dbf; } debug_register_view(cf_diag_dbg, &debug_sprintf_view); rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", -1); if (rc) { - debug_unregister_view(cf_diag_dbg, &debug_sprintf_view); - debug_unregister(cf_diag_dbg); pr_err("Registration of PMU(cpum_cf_diag) failed with rc=%i\n", rc); + goto out_perf; } + rc = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_S390_CFD_ONLINE, + "perf/s390/cfd:online", + cf_diag_online_cpu, cf_diag_offline_cpu); + if (!rc) + goto out; + + pr_err("Registration of CPUHP_AP_PERF_S390_CFD_ONLINE failed rc=%i\n", + rc); + perf_pmu_unregister(&cf_diag); +out_perf: + debug_unregister_view(cf_diag_dbg, &debug_sprintf_view); + debug_unregister(cf_diag_dbg); +out_dbf: + misc_deregister(&cf_diag_dev); +out: return rc; } -arch_initcall(cf_diag_init); +device_initcall(cf_diag_init); diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 19cd7b961c45..db62def4ef28 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1682,7 +1682,7 @@ static void aux_sdb_init(unsigned long sdb) /* Save clock base */ te->clock_base = 1; - memcpy(&te->progusage2, &tod_clock_base[1], 8); + te->progusage2 = tod_clock_base.tod; } /* diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index bc3ca54edfb4..e20bed1ed34a 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -29,6 +29,7 @@ #include <linux/random.h> #include <linux/export.h> #include <linux/init_task.h> +#include <linux/entry-common.h> #include <asm/cpu_mf.h> #include <asm/io.h> #include <asm/processor.h> @@ -43,9 +44,22 @@ #include <asm/unwind.h> #include "entry.h" -asmlinkage void ret_from_fork(void) asm ("ret_from_fork"); +void ret_from_fork(void) asm("ret_from_fork"); -extern void kernel_thread_starter(void); +void __ret_from_fork(struct task_struct *prev, struct pt_regs *regs) +{ + void (*func)(void *arg); + + schedule_tail(prev); + + if (!user_mode(regs)) { + /* Kernel thread */ + func = (void *)regs->gprs[9]; + func((void *)regs->gprs[10]); + } + clear_pt_regs_flag(regs, PIF_SYSCALL); + syscall_exit_to_user_mode(regs); +} void flush_thread(void) { @@ -108,22 +122,24 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp, p->thread.last_break = 1; frame->sf.back_chain = 0; + frame->sf.gprs[5] = (unsigned long)frame + sizeof(struct stack_frame); + frame->sf.gprs[6] = (unsigned long)p; /* new return point is ret_from_fork */ - frame->sf.gprs[8] = (unsigned long) ret_from_fork; + frame->sf.gprs[8] = (unsigned long)ret_from_fork; /* fake return stack for resume(), don't go back to schedule */ - frame->sf.gprs[9] = (unsigned long) frame; + frame->sf.gprs[9] = (unsigned long)frame; /* Store access registers to kernel stack of new process. */ - if (unlikely(p->flags & PF_KTHREAD)) { + if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { /* kernel thread */ memset(&frame->childregs, 0, sizeof(struct pt_regs)); frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; frame->childregs.psw.addr = - (unsigned long) kernel_thread_starter; + (unsigned long)__ret_from_fork; frame->childregs.gprs[9] = new_stackp; /* function */ frame->childregs.gprs[10] = arg; - frame->childregs.gprs[11] = (unsigned long) do_exit; + frame->childregs.gprs[11] = (unsigned long)do_exit; frame->childregs.orig_gpr2 = -1; return 0; @@ -153,7 +169,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp, return 0; } -asmlinkage void execve_tail(void) +void execve_tail(void) { current->thread.fpu.fpc = 0; asm volatile("sfpc %0" : : "d" (0)); diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index a76dd27fb2e8..18b3416fd663 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -7,6 +7,7 @@ * Martin Schwidefsky (schwidefsky@de.ibm.com) */ +#include "asm/ptrace.h" #include <linux/kernel.h> #include <linux/sched.h> #include <linux/sched/task_stack.h> @@ -37,9 +38,6 @@ #include "compat_ptrace.h" #endif -#define CREATE_TRACE_POINTS -#include <trace/events/syscalls.h> - void update_cr_regs(struct task_struct *task) { struct pt_regs *regs = task_pt_regs(task); @@ -140,7 +138,7 @@ void ptrace_disable(struct task_struct *task) memset(&task->thread.per_user, 0, sizeof(task->thread.per_user)); memset(&task->thread.per_event, 0, sizeof(task->thread.per_event)); clear_tsk_thread_flag(task, TIF_SINGLE_STEP); - clear_pt_regs_flag(task_pt_regs(task), PIF_PER_TRAP); + clear_tsk_thread_flag(task, TIF_PER_TRAP); task->thread.per_flags = 0; } @@ -322,25 +320,6 @@ static inline void __poke_user_per(struct task_struct *child, child->thread.per_user.end = data; } -static void fixup_int_code(struct task_struct *child, addr_t data) -{ - struct pt_regs *regs = task_pt_regs(child); - int ilc = regs->int_code >> 16; - u16 insn; - - if (ilc > 6) - return; - - if (ptrace_access_vm(child, regs->psw.addr - (regs->int_code >> 16), - &insn, sizeof(insn), FOLL_FORCE) != sizeof(insn)) - return; - - /* double check that tracee stopped on svc instruction */ - if ((insn >> 8) != 0xa) - return; - - regs->int_code = 0x20000 | (data & 0xffff); -} /* * Write a word to the user area of a process at location addr. This * operation does have an additional problem compared to peek_user. @@ -374,10 +353,12 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) } if (test_pt_regs_flag(regs, PIF_SYSCALL) && - addr == offsetof(struct user, regs.gprs[2])) - fixup_int_code(child, data); - *(addr_t *)((addr_t) ®s->psw + addr) = data; + addr == offsetof(struct user, regs.gprs[2])) { + struct pt_regs *regs = task_pt_regs(child); + regs->int_code = 0x20000 | (data & 0xffff); + } + *(addr_t *)((addr_t) ®s->psw + addr) = data; } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) { /* * access registers are stored in the thread structure @@ -742,10 +723,12 @@ static int __poke_user_compat(struct task_struct *child, regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) | (__u64)(tmp & PSW32_ADDR_AMODE); } else { - if (test_pt_regs_flag(regs, PIF_SYSCALL) && - addr == offsetof(struct compat_user, regs.gprs[2])) - fixup_int_code(child, data); + addr == offsetof(struct compat_user, regs.gprs[2])) { + struct pt_regs *regs = task_pt_regs(child); + + regs->int_code = 0x20000 | (data & 0xffff); + } /* gpr 0-15 */ *(__u32*)((addr_t) ®s->psw + addr*2 + 4) = tmp; } @@ -862,82 +845,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, } #endif -asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) -{ - unsigned long mask = -1UL; - long ret = -1; - - if (is_compat_task()) - mask = 0xffffffff; - - /* - * The sysc_tracesys code in entry.S stored the system - * call number to gprs[2]. - */ - if (test_thread_flag(TIF_SYSCALL_TRACE) && - tracehook_report_syscall_entry(regs)) { - /* - * Tracing decided this syscall should not happen. Skip - * the system call and the system call restart handling. - */ - goto skip; - } - -#ifdef CONFIG_SECCOMP - /* Do the secure computing check after ptrace. */ - if (unlikely(test_thread_flag(TIF_SECCOMP))) { - struct seccomp_data sd; - - if (is_compat_task()) { - sd.instruction_pointer = regs->psw.addr & 0x7fffffff; - sd.arch = AUDIT_ARCH_S390; - } else { - sd.instruction_pointer = regs->psw.addr; - sd.arch = AUDIT_ARCH_S390X; - } - - sd.nr = regs->int_code & 0xffff; - sd.args[0] = regs->orig_gpr2 & mask; - sd.args[1] = regs->gprs[3] & mask; - sd.args[2] = regs->gprs[4] & mask; - sd.args[3] = regs->gprs[5] & mask; - sd.args[4] = regs->gprs[6] & mask; - sd.args[5] = regs->gprs[7] & mask; - - if (__secure_computing(&sd) == -1) - goto skip; - } -#endif /* CONFIG_SECCOMP */ - - if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_enter(regs, regs->int_code & 0xffff); - - - audit_syscall_entry(regs->int_code & 0xffff, regs->orig_gpr2 & mask, - regs->gprs[3] &mask, regs->gprs[4] &mask, - regs->gprs[5] &mask); - - if ((signed long)regs->gprs[2] >= NR_syscalls) { - regs->gprs[2] = -ENOSYS; - ret = -ENOSYS; - } - return regs->gprs[2]; -skip: - clear_pt_regs_flag(regs, PIF_SYSCALL); - return ret; -} - -asmlinkage void do_syscall_trace_exit(struct pt_regs *regs) -{ - audit_syscall_exit(regs); - - if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_exit(regs, regs->gprs[2]); - - if (test_thread_flag(TIF_SYSCALL_TRACE)) - tracehook_report_syscall_exit(regs, 0); -} - /* * user_regset definitions. */ diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 1fbed91c73bc..60da976eee6f 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -338,7 +338,7 @@ int __init arch_early_irq_init(void) return 0; } -static int __init async_stack_realloc(void) +static int __init stack_realloc(void) { unsigned long old, new; @@ -346,11 +346,18 @@ static int __init async_stack_realloc(void) new = stack_alloc(); if (!new) panic("Couldn't allocate async stack"); - S390_lowcore.async_stack = new + STACK_INIT_OFFSET; + WRITE_ONCE(S390_lowcore.async_stack, new + STACK_INIT_OFFSET); free_pages(old, THREAD_SIZE_ORDER); + + old = S390_lowcore.mcck_stack - STACK_INIT_OFFSET; + new = stack_alloc(); + if (!new) + panic("Couldn't allocate machine check stack"); + WRITE_ONCE(S390_lowcore.mcck_stack, new + STACK_INIT_OFFSET); + memblock_free(old, THREAD_SIZE); return 0; } -early_initcall(async_stack_realloc); +early_initcall(stack_realloc); void __init arch_call_rest_init(void) { @@ -372,6 +379,7 @@ void __init arch_call_rest_init(void) static void __init setup_lowcore_dat_off(void) { unsigned long int_psw_mask = PSW_KERNEL_BITS; + unsigned long mcck_stack; struct lowcore *lc; if (IS_ENABLED(CONFIG_KASAN)) @@ -411,8 +419,7 @@ static void __init setup_lowcore_dat_off(void) memcpy(lc->alt_stfle_fac_list, S390_lowcore.alt_stfle_fac_list, sizeof(lc->alt_stfle_fac_list)); nmi_alloc_boot_cpu(lc); - lc->sync_enter_timer = S390_lowcore.sync_enter_timer; - lc->async_enter_timer = S390_lowcore.async_enter_timer; + lc->sys_enter_timer = S390_lowcore.sys_enter_timer; lc->exit_timer = S390_lowcore.exit_timer; lc->user_timer = S390_lowcore.user_timer; lc->system_timer = S390_lowcore.system_timer; @@ -440,6 +447,12 @@ static void __init setup_lowcore_dat_off(void) lc->restart_data = 0; lc->restart_source = -1UL; + mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE); + if (!mcck_stack) + panic("%s: Failed to allocate %lu bytes align=0x%lx\n", + __func__, THREAD_SIZE, THREAD_SIZE); + lc->mcck_stack = mcck_stack + STACK_INIT_OFFSET; + /* Setup absolute zero lowcore */ mem_assign_absolute(S390_lowcore.restart_stack, lc->restart_stack); mem_assign_absolute(S390_lowcore.restart_fn, lc->restart_fn); diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index b27b6c1f058d..90163e6184f5 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -16,6 +16,7 @@ #include <linux/smp.h> #include <linux/kernel.h> #include <linux/signal.h> +#include <linux/entry-common.h> #include <linux/errno.h> #include <linux/wait.h> #include <linux/ptrace.h> @@ -170,6 +171,7 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) fpregs_load(&user_sregs.fpregs, ¤t->thread.fpu); clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */ + clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART); return 0; } @@ -459,7 +461,8 @@ static void handle_signal(struct ksignal *ksig, sigset_t *oldset, * the kernel can handle, and then we build all the user-level signal handling * stack-frames in one go after that. */ -void do_signal(struct pt_regs *regs) + +void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal) { struct ksignal ksig; sigset_t *oldset = sigmask_to_save(); @@ -472,7 +475,7 @@ void do_signal(struct pt_regs *regs) current->thread.system_call = test_pt_regs_flag(regs, PIF_SYSCALL) ? regs->int_code : 0; - if (test_thread_flag(TIF_SIGPENDING) && get_signal(&ksig)) { + if (has_signal && get_signal(&ksig)) { /* Whee! Actually deliver the signal. */ if (current->thread.system_call) { regs->int_code = current->thread.system_call; @@ -498,6 +501,7 @@ void do_signal(struct pt_regs *regs) } /* No longer in a system call */ clear_pt_regs_flag(regs, PIF_SYSCALL); + clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART); rseq_signal_deliver(&ksig, regs); if (is_compat_task()) handle_signal32(&ksig, oldset, regs); @@ -508,6 +512,7 @@ void do_signal(struct pt_regs *regs) /* No handlers present - check for system call restart */ clear_pt_regs_flag(regs, PIF_SYSCALL); + clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART); if (current->thread.system_call) { regs->int_code = current->thread.system_call; switch (regs->gprs[2]) { @@ -520,9 +525,9 @@ void do_signal(struct pt_regs *regs) case -ERESTARTNOINTR: /* Restart system call with magic TIF bit. */ regs->gprs[2] = regs->orig_gpr2; - set_pt_regs_flag(regs, PIF_SYSCALL); + set_pt_regs_flag(regs, PIF_SYSCALL_RESTART); if (test_thread_flag(TIF_SINGLE_STEP)) - clear_pt_regs_flag(regs, PIF_PER_TRAP); + clear_thread_flag(TIF_PER_TRAP); break; } } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 27c763014114..58c8afa3da65 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -30,6 +30,7 @@ #include <linux/delay.h> #include <linux/interrupt.h> #include <linux/irqflags.h> +#include <linux/irq_work.h> #include <linux/cpu.h> #include <linux/slab.h> #include <linux/sched/hotplug.h> @@ -62,6 +63,7 @@ enum { ec_call_function_single, ec_stop_cpu, ec_mcck_pending, + ec_irq_work, }; enum { @@ -189,7 +191,7 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) { - unsigned long async_stack, nodat_stack; + unsigned long async_stack, nodat_stack, mcck_stack; struct lowcore *lc; if (pcpu != &pcpu_devices[0]) { @@ -202,13 +204,15 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) nodat_stack = pcpu->lowcore->nodat_stack - STACK_INIT_OFFSET; } async_stack = stack_alloc(); - if (!async_stack) - goto out; + mcck_stack = stack_alloc(); + if (!async_stack || !mcck_stack) + goto out_stack; lc = pcpu->lowcore; memcpy(lc, &S390_lowcore, 512); memset((char *) lc + 512, 0, sizeof(*lc) - 512); lc->async_stack = async_stack + STACK_INIT_OFFSET; lc->nodat_stack = nodat_stack + STACK_INIT_OFFSET; + lc->mcck_stack = mcck_stack + STACK_INIT_OFFSET; lc->cpu_nr = cpu; lc->spinlock_lockval = arch_spin_lockval(cpu); lc->spinlock_index = 0; @@ -216,12 +220,13 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW); lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); if (nmi_alloc_per_cpu(lc)) - goto out_async; + goto out_stack; lowcore_ptr[cpu] = lc; pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, (u32)(unsigned long) lc); return 0; -out_async: +out_stack: + stack_free(mcck_stack); stack_free(async_stack); out: if (pcpu != &pcpu_devices[0]) { @@ -233,16 +238,18 @@ out: static void pcpu_free_lowcore(struct pcpu *pcpu) { - unsigned long async_stack, nodat_stack, lowcore; + unsigned long async_stack, nodat_stack, mcck_stack, lowcore; nodat_stack = pcpu->lowcore->nodat_stack - STACK_INIT_OFFSET; async_stack = pcpu->lowcore->async_stack - STACK_INIT_OFFSET; + mcck_stack = pcpu->lowcore->mcck_stack - STACK_INIT_OFFSET; lowcore = (unsigned long) pcpu->lowcore; pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0); lowcore_ptr[pcpu - pcpu_devices] = NULL; nmi_free_per_cpu(pcpu->lowcore); stack_free(async_stack); + stack_free(mcck_stack); if (pcpu == &pcpu_devices[0]) return; free_pages(nodat_stack, THREAD_SIZE_ORDER); @@ -429,10 +436,12 @@ void notrace smp_yield_cpu(int cpu) */ void notrace smp_emergency_stop(void) { - cpumask_t cpumask; + static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED; + static cpumask_t cpumask; u64 end; int cpu; + arch_spin_lock(&lock); cpumask_copy(&cpumask, cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), &cpumask); @@ -453,6 +462,7 @@ void notrace smp_emergency_stop(void) break; cpu_relax(); } + arch_spin_unlock(&lock); } NOKPROBE_SYMBOL(smp_emergency_stop); @@ -499,7 +509,9 @@ static void smp_handle_ext_call(void) if (test_bit(ec_call_function_single, &bits)) generic_smp_call_function_single_interrupt(); if (test_bit(ec_mcck_pending, &bits)) - s390_handle_mcck(); + __s390_handle_mcck(); + if (test_bit(ec_irq_work, &bits)) + irq_work_run(); } static void do_ext_call_interrupt(struct ext_code ext_code, @@ -532,6 +544,13 @@ void smp_send_reschedule(int cpu) pcpu_ec_call(pcpu_devices + cpu, ec_schedule); } +#ifdef CONFIG_IRQ_WORK +void arch_irq_work_raise(void) +{ + pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_irq_work); +} +#endif + /* * parameter area for the set/clear control bit callbacks */ @@ -770,11 +789,13 @@ static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail, static int __smp_rescan_cpus(struct sclp_core_info *info, bool early) { struct sclp_core_entry *core; - cpumask_t avail; + static cpumask_t avail; bool configured; u16 core_id; int nr, i; + get_online_cpus(); + mutex_lock(&smp_cpu_state_mutex); nr = 0; cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); /* @@ -795,6 +816,8 @@ static int __smp_rescan_cpus(struct sclp_core_info *info, bool early) configured = i < info->configured; nr += smp_add_core(&info->core[i], &avail, configured, early); } + mutex_unlock(&smp_cpu_state_mutex); + put_online_cpus(); return nr; } @@ -842,9 +865,7 @@ void __init smp_detect_cpus(void) pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus); /* Add CPUs present at boot */ - get_online_cpus(); __smp_rescan_cpus(info, true); - put_online_cpus(); memblock_free_early((unsigned long)info, sizeof(*info)); } @@ -1173,11 +1194,7 @@ int __ref smp_rescan_cpus(void) if (!info) return -ENOMEM; smp_get_core_info(info, 0); - get_online_cpus(); - mutex_lock(&smp_cpu_state_mutex); nr = __smp_rescan_cpus(info, false); - mutex_unlock(&smp_cpu_state_mutex); - put_online_cpus(); kfree(info); if (nr) topology_schedule_update(); diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/syscall.c index 202fa73ac167..bc8e650e377d 100644 --- a/arch/s390/kernel/sys_s390.c +++ b/arch/s390/kernel/syscall.c @@ -29,6 +29,13 @@ #include <linux/unistd.h> #include <linux/ipc.h> #include <linux/uaccess.h> +#include <linux/string.h> +#include <linux/thread_info.h> +#include <linux/entry-common.h> + +#include <asm/ptrace.h> +#include <asm/vtime.h> + #include "entry.h" /* @@ -100,3 +107,62 @@ SYSCALL_DEFINE0(ni_syscall) { return -ENOSYS; } + +void do_syscall(struct pt_regs *regs) +{ + unsigned long nr; + + nr = regs->int_code & 0xffff; + if (!nr) { + nr = regs->gprs[1] & 0xffff; + regs->int_code &= ~0xffffUL; + regs->int_code |= nr; + } + + regs->gprs[2] = nr; + + nr = syscall_enter_from_user_mode_work(regs, nr); + + /* + * In the s390 ptrace ABI, both the syscall number and the return value + * use gpr2. However, userspace puts the syscall number either in the + * svc instruction itself, or uses gpr1. To make at least skipping syscalls + * work, the ptrace code sets PIF_SYSCALL_RET_SET, which is checked here + * and if set, the syscall will be skipped. + */ + if (!test_pt_regs_flag(regs, PIF_SYSCALL_RET_SET)) { + regs->gprs[2] = -ENOSYS; + if (likely(nr < NR_syscalls)) + regs->gprs[2] = current->thread.sys_call_table[nr](regs); + } else { + clear_pt_regs_flag(regs, PIF_SYSCALL_RET_SET); + } + syscall_exit_to_user_mode_work(regs); +} + +void noinstr __do_syscall(struct pt_regs *regs, int per_trap) +{ + enter_from_user_mode(regs); + + memcpy(®s->gprs[8], S390_lowcore.save_area_sync, 8 * sizeof(unsigned long)); + memcpy(®s->int_code, &S390_lowcore.svc_ilc, sizeof(regs->int_code)); + regs->psw = S390_lowcore.svc_old_psw; + + update_timer_sys(); + + local_irq_enable(); + regs->orig_gpr2 = regs->gprs[2]; + + if (per_trap) + set_thread_flag(TIF_PER_TRAP); + + for (;;) { + regs->flags = 0; + set_pt_regs_flag(regs, PIF_SYSCALL); + do_syscall(regs); + if (!test_pt_regs_flag(regs, PIF_SYSCALL_RESTART)) + break; + local_irq_enable(); + } + exit_to_user_mode(); +} diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index d443423495e5..3abef2144dac 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -444,3 +444,4 @@ 439 common faccessat2 sys_faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 +442 common mount_setattr sys_mount_setattr sys_mount_setattr diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index c59cb44fbb7d..06bcfa636638 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -55,11 +55,7 @@ #include <asm/cio.h> #include "entry.h" -unsigned char tod_clock_base[16] __aligned(8) = { - /* Force to data section. */ - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff -}; +union tod_clock tod_clock_base __section(".data"); EXPORT_SYMBOL_GPL(tod_clock_base); u64 clock_comparator_max = -1ULL; @@ -86,7 +82,7 @@ void __init time_early_init(void) struct ptff_qui qui; /* Initialize TOD steering parameters */ - tod_steering_end = *(unsigned long long *) &tod_clock_base[1]; + tod_steering_end = tod_clock_base.tod; vdso_data->arch_data.tod_steering_end = tod_steering_end; if (!test_facility(28)) @@ -113,18 +109,13 @@ unsigned long long notrace sched_clock(void) } NOKPROBE_SYMBOL(sched_clock); -static void ext_to_timespec64(unsigned char *clk, struct timespec64 *xt) +static void ext_to_timespec64(union tod_clock *clk, struct timespec64 *xt) { - unsigned long long high, low, rem, sec, nsec; + unsigned long rem, sec, nsec; - /* Split extendnd TOD clock to micro-seconds and sub-micro-seconds */ - high = (*(unsigned long long *) clk) >> 4; - low = (*(unsigned long long *)&clk[7]) << 4; - /* Calculate seconds and nano-seconds */ - sec = high; + sec = clk->us; rem = do_div(sec, 1000000); - nsec = (((low >> 32) + (rem << 32)) * 1000) >> 32; - + nsec = ((clk->sus + (rem << 12)) * 125) >> 9; xt->tv_sec = sec; xt->tv_nsec = nsec; } @@ -204,30 +195,26 @@ static void stp_reset(void); void read_persistent_clock64(struct timespec64 *ts) { - unsigned char clk[STORE_CLOCK_EXT_SIZE]; - __u64 delta; + union tod_clock clk; + u64 delta; delta = initial_leap_seconds + TOD_UNIX_EPOCH; - get_tod_clock_ext(clk); - *(__u64 *) &clk[1] -= delta; - if (*(__u64 *) &clk[1] > delta) - clk[0]--; - ext_to_timespec64(clk, ts); + store_tod_clock_ext(&clk); + clk.eitod -= delta; + ext_to_timespec64(&clk, ts); } void __init read_persistent_wall_and_boot_offset(struct timespec64 *wall_time, struct timespec64 *boot_offset) { - unsigned char clk[STORE_CLOCK_EXT_SIZE]; struct timespec64 boot_time; - __u64 delta; + union tod_clock clk; + u64 delta; delta = initial_leap_seconds + TOD_UNIX_EPOCH; - memcpy(clk, tod_clock_base, STORE_CLOCK_EXT_SIZE); - *(__u64 *)&clk[1] -= delta; - if (*(__u64 *)&clk[1] > delta) - clk[0]--; - ext_to_timespec64(clk, &boot_time); + clk = tod_clock_base; + clk.eitod -= delta; + ext_to_timespec64(&clk, &boot_time); read_persistent_clock64(wall_time); *boot_offset = timespec64_sub(*wall_time, boot_time); @@ -381,10 +368,7 @@ static void clock_sync_global(unsigned long long delta) struct ptff_qto qto; /* Fixup the monotonic sched clock. */ - *(unsigned long long *) &tod_clock_base[1] += delta; - if (*(unsigned long long *) &tod_clock_base[1] < delta) - /* Epoch overflow */ - tod_clock_base[0]++; + tod_clock_base.eitod += delta; /* Adjust TOD steering parameters. */ now = get_tod_clock(); adj = tod_steering_end - now; diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index ca47141a5be9..e7ce447651b9 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -62,16 +62,16 @@ static struct mask_info drawer_info; struct cpu_topology_s390 cpu_topology[NR_CPUS]; EXPORT_SYMBOL_GPL(cpu_topology); -static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) +static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int cpu) { - cpumask_t mask; + static cpumask_t mask; cpumask_copy(&mask, cpumask_of(cpu)); switch (topology_mode) { case TOPOLOGY_MODE_HW: while (info) { if (cpumask_test_cpu(cpu, &info->mask)) { - mask = info->mask; + cpumask_copy(&mask, &info->mask); break; } info = info->next; @@ -89,23 +89,24 @@ static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) break; } cpumask_and(&mask, &mask, cpu_online_mask); - return mask; + cpumask_copy(dst, &mask); } -static cpumask_t cpu_thread_map(unsigned int cpu) +static void cpu_thread_map(cpumask_t *dst, unsigned int cpu) { - cpumask_t mask; + static cpumask_t mask; int i; cpumask_copy(&mask, cpumask_of(cpu)); if (topology_mode != TOPOLOGY_MODE_HW) - return mask; + goto out; cpu -= cpu % (smp_cpu_mtid + 1); for (i = 0; i <= smp_cpu_mtid; i++) if (cpu_present(cpu + i)) cpumask_set_cpu(cpu + i, &mask); cpumask_and(&mask, &mask, cpu_online_mask); - return mask; +out: + cpumask_copy(dst, &mask); } #define TOPOLOGY_CORE_BITS 64 @@ -250,10 +251,10 @@ void update_cpu_masks(void) for_each_possible_cpu(cpu) { topo = &cpu_topology[cpu]; - topo->thread_mask = cpu_thread_map(cpu); - topo->core_mask = cpu_group_map(&socket_info, cpu); - topo->book_mask = cpu_group_map(&book_info, cpu); - topo->drawer_mask = cpu_group_map(&drawer_info, cpu); + cpu_thread_map(&topo->thread_mask, cpu); + cpu_group_map(&topo->core_mask, &socket_info, cpu); + cpu_group_map(&topo->book_mask, &book_info, cpu); + cpu_group_map(&topo->drawer_mask, &drawer_info, cpu); topo->booted_cores = 0; if (topology_mode != TOPOLOGY_MODE_HW) { id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu; diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 8d1e8a1a97df..db7dd59b570c 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -13,6 +13,8 @@ * 'Traps.c' handles hardware traps and faults after we have saved some * state in 'asm.s'. */ +#include "asm/irqflags.h" +#include "asm/ptrace.h" #include <linux/kprobes.h> #include <linux/kdebug.h> #include <linux/extable.h> @@ -23,7 +25,9 @@ #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/cpu.h> +#include <linux/entry-common.h> #include <asm/fpu/api.h> +#include <asm/vtime.h> #include "entry.h" static inline void __user *get_trap_ip(struct pt_regs *regs) @@ -288,3 +292,64 @@ void __init trap_init(void) local_mcck_enable(); test_monitor_call(); } + +void noinstr __do_pgm_check(struct pt_regs *regs) +{ + unsigned long last_break = S390_lowcore.breaking_event_addr; + unsigned int trapnr, syscall_redirect = 0; + irqentry_state_t state; + + regs->int_code = *(u32 *)&S390_lowcore.pgm_ilc; + regs->int_parm_long = S390_lowcore.trans_exc_code; + + state = irqentry_enter(regs); + + if (user_mode(regs)) { + update_timer_sys(); + if (last_break < 4096) + last_break = 1; + current->thread.last_break = last_break; + regs->args[0] = last_break; + } + + if (S390_lowcore.pgm_code & 0x0200) { + /* transaction abort */ + memcpy(¤t->thread.trap_tdb, &S390_lowcore.pgm_tdb, 256); + } + + if (S390_lowcore.pgm_code & PGM_INT_CODE_PER) { + if (user_mode(regs)) { + struct per_event *ev = ¤t->thread.per_event; + + set_thread_flag(TIF_PER_TRAP); + ev->address = S390_lowcore.per_address; + ev->cause = *(u16 *)&S390_lowcore.per_code; + ev->paid = S390_lowcore.per_access_id; + } else { + /* PER event in kernel is kprobes */ + __arch_local_irq_ssm(regs->psw.mask & ~PSW_MASK_PER); + do_per_trap(regs); + goto out; + } + } + + if (!irqs_disabled_flags(regs->psw.mask)) + trace_hardirqs_on(); + __arch_local_irq_ssm(regs->psw.mask & ~PSW_MASK_PER); + + trapnr = regs->int_code & PGM_INT_CODE_MASK; + if (trapnr) + pgm_check_table[trapnr](regs); + syscall_redirect = user_mode(regs) && test_pt_regs_flag(regs, PIF_SYSCALL); +out: + local_irq_disable(); + irqentry_exit(regs, state); + + if (syscall_redirect) { + enter_from_user_mode(regs); + local_irq_enable(); + regs->orig_gpr2 = regs->gprs[2]; + do_syscall(regs); + exit_to_user_mode(); + } +} diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c index 5007fac01bb5..bbf8622bbf5d 100644 --- a/arch/s390/kernel/uprobes.c +++ b/arch/s390/kernel/uprobes.c @@ -32,7 +32,7 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) return -EINVAL; if (!is_compat_task() && psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT) return -EINVAL; - clear_pt_regs_flag(regs, PIF_PER_TRAP); + clear_thread_flag(TIF_PER_TRAP); auprobe->saved_per = psw_bits(regs->psw).per; auprobe->saved_int_code = regs->int_code; regs->int_code = UPROBE_TRAP_NR; @@ -103,7 +103,7 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) /* fix per address */ current->thread.per_event.address = utask->vaddr; /* trigger per event */ - set_pt_regs_flag(regs, PIF_PER_TRAP); + set_thread_flag(TIF_PER_TRAP); } return 0; } @@ -259,7 +259,7 @@ static void sim_stor_event(struct pt_regs *regs, void *addr, int len) return; current->thread.per_event.address = regs->psw.addr; current->thread.per_event.cause = PER_EVENT_STORE >> 16; - set_pt_regs_flag(regs, PIF_PER_TRAP); + set_thread_flag(TIF_PER_TRAP); } /* diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 8bc269c55fd3..8c4e07d533c8 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -6,186 +6,226 @@ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) */ -#include <linux/init.h> +#include <linux/binfmts.h> +#include <linux/compat.h> +#include <linux/elf.h> #include <linux/errno.h> -#include <linux/sched.h> +#include <linux/init.h> #include <linux/kernel.h> #include <linux/mm.h> -#include <linux/smp.h> -#include <linux/stddef.h> -#include <linux/unistd.h> #include <linux/slab.h> -#include <linux/user.h> -#include <linux/elf.h> -#include <linux/security.h> -#include <linux/memblock.h> -#include <linux/compat.h> -#include <linux/binfmts.h> +#include <linux/smp.h> +#include <linux/time_namespace.h> #include <vdso/datapage.h> -#include <asm/asm-offsets.h> -#include <asm/processor.h> -#include <asm/mmu.h> -#include <asm/mmu_context.h> -#include <asm/sections.h> #include <asm/vdso.h> -#include <asm/facility.h> -#include <asm/timex.h> -extern char vdso64_start, vdso64_end; -static void *vdso64_kbase = &vdso64_start; -static unsigned int vdso64_pages; -static struct page **vdso64_pagelist; +extern char vdso64_start[], vdso64_end[]; +static unsigned int vdso_pages; + +static struct vm_special_mapping vvar_mapping; + +static union { + struct vdso_data data[CS_BASES]; + u8 page[PAGE_SIZE]; +} vdso_data_store __page_aligned_data; + +struct vdso_data *vdso_data = vdso_data_store.data; + +enum vvar_pages { + VVAR_DATA_PAGE_OFFSET, + VVAR_TIMENS_PAGE_OFFSET, + VVAR_NR_PAGES, +}; -/* - * Should the kernel map a VDSO page into processes and pass its - * address down to glibc upon exec()? - */ unsigned int __read_mostly vdso_enabled = 1; -static vm_fault_t vdso_fault(const struct vm_special_mapping *sm, - struct vm_area_struct *vma, struct vm_fault *vmf) +static int __init vdso_setup(char *str) +{ + bool enabled; + + if (!kstrtobool(str, &enabled)) + vdso_enabled = enabled; + return 1; +} +__setup("vdso=", vdso_setup); + +#ifdef CONFIG_TIME_NS +struct vdso_data *arch_get_vdso_data(void *vvar_page) +{ + return (struct vdso_data *)(vvar_page); +} + +static struct page *find_timens_vvar_page(struct vm_area_struct *vma) { - struct page **vdso_pagelist; - unsigned long vdso_pages; + if (likely(vma->vm_mm == current->mm)) + return current->nsproxy->time_ns->vvar_page; + /* + * VM_PFNMAP | VM_IO protect .fault() handler from being called + * through interfaces like /proc/$pid/mem or + * process_vm_{readv,writev}() as long as there's no .access() + * in special_mapping_vmops(). + * For more details check_vma_flags() and __access_remote_vm() + */ + WARN(1, "vvar_page accessed remotely"); + return NULL; +} - vdso_pagelist = vdso64_pagelist; - vdso_pages = vdso64_pages; +/* + * The VVAR page layout depends on whether a task belongs to the root or + * non-root time namespace. Whenever a task changes its namespace, the VVAR + * page tables are cleared and then they will be re-faulted with a + * corresponding layout. + * See also the comment near timens_setup_vdso_data() for details. + */ +int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) +{ + struct mm_struct *mm = task->mm; + struct vm_area_struct *vma; - if (vmf->pgoff >= vdso_pages) - return VM_FAULT_SIGBUS; + mmap_read_lock(mm); + for (vma = mm->mmap; vma; vma = vma->vm_next) { + unsigned long size = vma->vm_end - vma->vm_start; - vmf->page = vdso_pagelist[vmf->pgoff]; - get_page(vmf->page); + if (!vma_is_special_mapping(vma, &vvar_mapping)) + continue; + zap_page_range(vma, vma->vm_start, size); + break; + } + mmap_read_unlock(mm); return 0; } +#else +static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + return NULL; +} +#endif + +static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, + struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct page *timens_page = find_timens_vvar_page(vma); + unsigned long addr, pfn; + vm_fault_t err; + + switch (vmf->pgoff) { + case VVAR_DATA_PAGE_OFFSET: + pfn = virt_to_pfn(vdso_data); + if (timens_page) { + /* + * Fault in VVAR page too, since it will be accessed + * to get clock data anyway. + */ + addr = vmf->address + VVAR_TIMENS_PAGE_OFFSET * PAGE_SIZE; + err = vmf_insert_pfn(vma, addr, pfn); + if (unlikely(err & VM_FAULT_ERROR)) + return err; + pfn = page_to_pfn(timens_page); + } + break; +#ifdef CONFIG_TIME_NS + case VVAR_TIMENS_PAGE_OFFSET: + /* + * If a task belongs to a time namespace then a namespace + * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and + * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET + * offset. + * See also the comment near timens_setup_vdso_data(). + */ + if (!timens_page) + return VM_FAULT_SIGBUS; + pfn = virt_to_pfn(vdso_data); + break; +#endif /* CONFIG_TIME_NS */ + default: + return VM_FAULT_SIGBUS; + } + return vmf_insert_pfn(vma, vmf->address, pfn); +} static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *vma) { current->mm->context.vdso_base = vma->vm_start; - return 0; } -static const struct vm_special_mapping vdso_mapping = { +static struct vm_special_mapping vvar_mapping = { + .name = "[vvar]", + .fault = vvar_fault, +}; + +static struct vm_special_mapping vdso_mapping = { .name = "[vdso]", - .fault = vdso_fault, .mremap = vdso_mremap, }; -static int __init vdso_setup(char *str) -{ - bool enabled; - - if (!kstrtobool(str, &enabled)) - vdso_enabled = enabled; - return 1; -} -__setup("vdso=", vdso_setup); - -/* - * The vdso data page - */ -static union { - struct vdso_data data; - u8 page[PAGE_SIZE]; -} vdso_data_store __page_aligned_data; -struct vdso_data *vdso_data = (struct vdso_data *)&vdso_data_store.data; - -void vdso_getcpu_init(void) +int vdso_getcpu_init(void) { set_tod_programmable_field(smp_processor_id()); + return 0; } +early_initcall(vdso_getcpu_init); /* Must be called before SMP init */ -/* - * This is called from binfmt_elf, we create the special vma for the - * vDSO and insert it into the mm struct tree - */ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { + unsigned long vdso_text_len, vdso_mapping_len; + unsigned long vvar_start, vdso_text_start; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - unsigned long vdso_pages; - unsigned long vdso_base; int rc; - if (!vdso_enabled) - return 0; - - if (is_compat_task()) - return 0; - - vdso_pages = vdso64_pages; - /* - * vDSO has a problem and was disabled, just don't "enable" it for - * the process - */ - if (vdso_pages == 0) + BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES); + if (!vdso_enabled || is_compat_task()) return 0; - - /* - * pick a base address for the vDSO in process space. We try to put - * it at vdso_base which is the "natural" base for it, but we might - * fail and end up putting it elsewhere. - */ if (mmap_write_lock_killable(mm)) return -EINTR; - vdso_base = get_unmapped_area(NULL, 0, vdso_pages << PAGE_SHIFT, 0, 0); - if (IS_ERR_VALUE(vdso_base)) { - rc = vdso_base; - goto out_up; - } - - /* - * our vma flags don't have VM_WRITE so by default, the process - * isn't allowed to write those pages. - * gdb can break that with ptrace interface, and thus trigger COW - * on those pages but it's then your responsibility to never do that - * on the "data" page of the vDSO or you'll stop getting kernel - * updates and your nice userland gettimeofday will be totally dead. - * It's fine to use that for setting breakpoints in the vDSO code - * pages though. - */ - vma = _install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT, + vdso_text_len = vdso_pages << PAGE_SHIFT; + vdso_mapping_len = vdso_text_len + VVAR_NR_PAGES * PAGE_SIZE; + vvar_start = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); + rc = vvar_start; + if (IS_ERR_VALUE(vvar_start)) + goto out; + vma = _install_special_mapping(mm, vvar_start, VVAR_NR_PAGES*PAGE_SIZE, + VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP| + VM_PFNMAP, + &vvar_mapping); + rc = PTR_ERR(vma); + if (IS_ERR(vma)) + goto out; + vdso_text_start = vvar_start + VVAR_NR_PAGES * PAGE_SIZE; + /* VM_MAYWRITE for COW so gdb can set breakpoints */ + vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, &vdso_mapping); if (IS_ERR(vma)) { + do_munmap(mm, vvar_start, PAGE_SIZE, NULL); rc = PTR_ERR(vma); - goto out_up; + } else { + current->mm->context.vdso_base = vdso_text_start; + rc = 0; } - - current->mm->context.vdso_base = vdso_base; - rc = 0; - -out_up: +out: mmap_write_unlock(mm); return rc; } static int __init vdso_init(void) { + struct page **pages; int i; - vdso_getcpu_init(); - /* Calculate the size of the 64 bit vDSO */ - vdso64_pages = ((&vdso64_end - &vdso64_start - + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1; - - /* Make sure pages are in the correct state */ - vdso64_pagelist = kcalloc(vdso64_pages + 1, sizeof(struct page *), - GFP_KERNEL); - BUG_ON(vdso64_pagelist == NULL); - for (i = 0; i < vdso64_pages - 1; i++) { - struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE); - get_page(pg); - vdso64_pagelist[i] = pg; + vdso_pages = (vdso64_end - vdso64_start) >> PAGE_SHIFT; + pages = kcalloc(vdso_pages + 1, sizeof(struct page *), GFP_KERNEL); + if (!pages) { + vdso_enabled = 0; + return -ENOMEM; } - vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data); - vdso64_pagelist[vdso64_pages] = NULL; - - get_page(virt_to_page(vdso_data)); - + for (i = 0; i < vdso_pages; i++) + pages[i] = virt_to_page(vdso64_start + i * PAGE_SIZE); + pages[vdso_pages] = NULL; + vdso_mapping.pages = pages; return 0; } -early_initcall(vdso_init); +arch_initcall(vdso_init); diff --git a/arch/s390/kernel/vdso64/getcpu.c b/arch/s390/kernel/vdso64/getcpu.c index 5b2bc7494d5b..5c5d4a848b76 100644 --- a/arch/s390/kernel/vdso64/getcpu.c +++ b/arch/s390/kernel/vdso64/getcpu.c @@ -8,12 +8,12 @@ int __s390_vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) { - __u16 todval[8]; + union tod_clock clk; /* CPU number is stored in the programmable field of the TOD clock */ - get_tod_clock_ext((char *)todval); + store_tod_clock_ext(&clk); if (cpu) - *cpu = todval[7]; + *cpu = clk.pf; /* NUMA node is always zero */ if (node) *node = 0; diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S index 7bde3909290f..518f1ea405f4 100644 --- a/arch/s390/kernel/vdso64/vdso64.lds.S +++ b/arch/s390/kernel/vdso64/vdso64.lds.S @@ -13,6 +13,10 @@ ENTRY(_start) SECTIONS { + PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); +#ifdef CONFIG_TIME_NS + PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); +#endif . = VDSO64_LBASE + SIZEOF_HEADERS; .hash : { *(.hash) } :text @@ -94,9 +98,6 @@ SECTIONS .debug_ranges 0 : { *(.debug_ranges) } .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } - . = ALIGN(PAGE_SIZE); - PROVIDE(_vdso_data = .); - /DISCARD/ : { *(.note.GNU-stack) *(.branch_lt) diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 5aaa2ca6a928..73c7afcc0527 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -13,7 +13,7 @@ #include <linux/timex.h> #include <linux/types.h> #include <linux/time.h> - +#include <asm/alternative.h> #include <asm/vtimer.h> #include <asm/vtime.h> #include <asm/cpu_mf.h> @@ -128,15 +128,13 @@ static int do_account_vtime(struct task_struct *tsk) timer = S390_lowcore.last_update_timer; clock = S390_lowcore.last_update_clock; - asm volatile( - " stpt %0\n" /* Store current cpu timer value */ -#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES - " stckf %1" /* Store current tod clock value */ -#else - " stck %1" /* Store current tod clock value */ -#endif - : "=Q" (S390_lowcore.last_update_timer), - "=Q" (S390_lowcore.last_update_clock)); + /* Use STORE CLOCK by default, STORE CLOCK FAST if available. */ + alternative_io("stpt %0\n .insn s,0xb2050000,%1\n", + "stpt %0\n .insn s,0xb27c0000,%1\n", + 25, + ASM_OUTPUT2("=Q" (S390_lowcore.last_update_timer), + "=Q" (S390_lowcore.last_update_clock)), + ASM_NO_INPUT_CLOBBER("cc")); clock = S390_lowcore.last_update_clock - clock; timer -= S390_lowcore.last_update_timer; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index dbafd057ca6a..2f09e9d7dc95 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -45,6 +45,7 @@ #include <asm/timex.h> #include <asm/ap.h> #include <asm/uv.h> +#include <asm/fpu/api.h> #include "kvm-s390.h" #include "gaccess.h" @@ -164,12 +165,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } }; -struct kvm_s390_tod_clock_ext { - __u8 epoch_idx; - __u64 tod; - __u8 reserved[7]; -} __packed; - /* allow nested virtualization in KVM (if enabled by user space) */ static int nested; module_param(nested, int, S_IRUGO); @@ -1165,17 +1160,17 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) static void kvm_s390_get_tod_clock(struct kvm *kvm, struct kvm_s390_vm_tod_clock *gtod) { - struct kvm_s390_tod_clock_ext htod; + union tod_clock clk; preempt_disable(); - get_tod_clock_ext((char *)&htod); + store_tod_clock_ext(&clk); - gtod->tod = htod.tod + kvm->arch.epoch; + gtod->tod = clk.tod + kvm->arch.epoch; gtod->epoch_idx = 0; if (test_kvm_facility(kvm, 139)) { - gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx; - if (gtod->tod < htod.tod) + gtod->epoch_idx = clk.ei + kvm->arch.epdx; + if (gtod->tod < clk.tod) gtod->epoch_idx += 1; } @@ -3866,18 +3861,18 @@ void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) { struct kvm_vcpu *vcpu; - struct kvm_s390_tod_clock_ext htod; + union tod_clock clk; int i; mutex_lock(&kvm->lock); preempt_disable(); - get_tod_clock_ext((char *)&htod); + store_tod_clock_ext(&clk); - kvm->arch.epoch = gtod->tod - htod.tod; + kvm->arch.epoch = gtod->tod - clk.tod; kvm->arch.epdx = 0; if (test_kvm_facility(kvm, 139)) { - kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx; + kvm->arch.epdx = gtod->epoch_idx - clk.ei; if (kvm->arch.epoch > gtod->tod) kvm->arch.epdx -= 1; } @@ -4147,6 +4142,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) vcpu->run->s.regs.gprs, sizeof(sie_page->pv_grregs)); } + if (test_cpu_flag(CIF_FPU)) + load_fpu_regs(); exit_reason = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs); if (kvm_s390_pv_cpu_is_protected(vcpu)) { diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index c5d0a58b2c29..bd803e091918 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -18,6 +18,7 @@ #include <asm/sclp.h> #include <asm/nmi.h> #include <asm/dis.h> +#include <asm/fpu/api.h> #include "kvm-s390.h" #include "gaccess.h" @@ -1028,6 +1029,8 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) */ vcpu->arch.sie_block->prog0c |= PROG_IN_SIE; barrier(); + if (test_cpu_flag(CIF_FPU)) + load_fpu_regs(); if (!kvm_s390_vcpu_sie_inhibited(vcpu)) rc = sie64a(scb_s, vcpu->run->s.regs.gprs); barrier(); diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index e8f642446fed..2fece1fd210a 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -16,8 +16,8 @@ #include <asm/mmu_context.h> #include <asm/facility.h> -#ifdef CONFIG_DEBUG_USER_ASCE -void debug_user_asce(void) +#ifdef CONFIG_DEBUG_ENTRY +void debug_user_asce(int exit) { unsigned long cr1, cr7; @@ -25,12 +25,14 @@ void debug_user_asce(void) __ctl_store(cr7, 7, 7); if (cr1 == S390_lowcore.kernel_asce && cr7 == S390_lowcore.user_asce) return; - panic("incorrect ASCE on kernel exit\n" + panic("incorrect ASCE on kernel %s\n" "cr1: %016lx cr7: %016lx\n" "kernel: %016llx user: %016llx\n", - cr1, cr7, S390_lowcore.kernel_asce, S390_lowcore.user_asce); + exit ? "exit" : "entry", cr1, cr7, + S390_lowcore.kernel_asce, S390_lowcore.user_asce); + } -#endif /*CONFIG_DEBUG_USER_ASCE */ +#endif /*CONFIG_DEBUG_ENTRY */ #ifndef CONFIG_HAVE_MARCH_Z10_FEATURES static DEFINE_STATIC_KEY_FALSE(have_mvcos); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index b8210103de14..e30c7c781172 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -385,7 +385,7 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) * The instruction that caused the program check has * been nullified. Don't signal single step via SIGTRAP. */ - clear_pt_regs_flag(regs, PIF_PER_TRAP); + clear_thread_flag(TIF_PER_TRAP); if (kprobe_page_fault(regs, 14)) return 0; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 73a163065b95..0e76b2127dc6 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -297,6 +297,7 @@ int arch_add_memory(int nid, u64 start, u64 size, if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot)) return -EINVAL; + VM_BUG_ON(!mhp_range_allowed(start, size, true)); rc = vmem_add_mapping(start, size); if (rc) return rc; diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 4e87c819ddea..781965f7210e 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -58,7 +58,7 @@ unsigned long *crst_table_alloc(struct mm_struct *mm) if (!page) return NULL; arch_set_page_dat(page, 2); - return (unsigned long *) page_to_phys(page); + return (unsigned long *) page_to_virt(page); } void crst_table_free(struct mm_struct *mm, unsigned long *table) @@ -161,7 +161,7 @@ struct page *page_table_alloc_pgste(struct mm_struct *mm) page = alloc_page(GFP_KERNEL); if (page) { - table = (u64 *)page_to_phys(page); + table = (u64 *)page_to_virt(page); memset64(table, _PAGE_INVALID, PTRS_PER_PTE); memset64(table + PTRS_PER_PTE, 0, PTRS_PER_PTE); } @@ -194,7 +194,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) mask = atomic_read(&page->_refcount) >> 24; mask = (mask | (mask >> 4)) & 3; if (mask != 3) { - table = (unsigned long *) page_to_phys(page); + table = (unsigned long *) page_to_virt(page); bit = mask & 1; /* =1 -> second 2K */ if (bit) table += PTRS_PER_PTE; @@ -217,7 +217,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) } arch_set_page_dat(page, 0); /* Initialize page table */ - table = (unsigned long *) page_to_phys(page); + table = (unsigned long *) page_to_virt(page); if (mm_alloc_pgste(mm)) { /* Return 4K page table with PGSTEs */ atomic_xor_bits(&page->_refcount, 3 << 24); @@ -239,10 +239,10 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) struct page *page; unsigned int bit, mask; - page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + page = virt_to_page(table); if (!mm_alloc_pgste(mm)) { /* Free 2K page table fragment of a 4K page */ - bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); + bit = ((unsigned long) table & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); spin_lock_bh(&mm->context.lock); mask = atomic_xor_bits(&page->_refcount, 1U << (bit + 24)); mask >>= 24; @@ -269,14 +269,14 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, unsigned int bit, mask; mm = tlb->mm; - page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + page = virt_to_page(table); if (mm_alloc_pgste(mm)) { gmap_unlink(mm, table, vmaddr); - table = (unsigned long *) (__pa(table) | 3); + table = (unsigned long *) ((unsigned long)table | 3); tlb_remove_table(tlb, table); return; } - bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)); + bit = ((unsigned long) table & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)); spin_lock_bh(&mm->context.lock); mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24)); mask >>= 24; @@ -285,7 +285,7 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, else list_del(&page->lru); spin_unlock_bh(&mm->context.lock); - table = (unsigned long *) (__pa(table) | (1U << bit)); + table = (unsigned long *) ((unsigned long) table | (1U << bit)); tlb_remove_table(tlb, table); } @@ -293,7 +293,7 @@ void __tlb_remove_table(void *_table) { unsigned int mask = (unsigned long) _table & 3; void *table = (void *)((unsigned long) _table ^ mask); - struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + struct page *page = virt_to_page(table); switch (mask) { case 0: /* pmd, pud, or p4d */ diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 01f3a5f58e64..96897fab89dc 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -4,6 +4,7 @@ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> */ +#include <linux/memory_hotplug.h> #include <linux/memblock.h> #include <linux/pfn.h> #include <linux/mm.h> @@ -26,14 +27,14 @@ static void __ref *vmem_alloc_pages(unsigned int order) if (slab_is_available()) return (void *)__get_free_pages(GFP_KERNEL, order); - return (void *) memblock_phys_alloc(size, size); + return memblock_alloc(size, size); } static void vmem_free_pages(unsigned long addr, int order) { /* We don't expect boot memory to be removed ever. */ if (!slab_is_available() || - WARN_ON_ONCE(PageReserved(phys_to_page(addr)))) + WARN_ON_ONCE(PageReserved(virt_to_page(addr)))) return; free_pages(addr, order); } @@ -56,7 +57,7 @@ pte_t __ref *vmem_pte_alloc(void) if (slab_is_available()) pte = (pte_t *) page_table_alloc(&init_mm); else - pte = (pte_t *) memblock_phys_alloc(size, size); + pte = (pte_t *) memblock_alloc(size, size); if (!pte) return NULL; memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE); @@ -84,7 +85,7 @@ static void vmemmap_flush_unused_sub_pmd(void) { if (!unused_sub_pmd_start) return; - memset(__va(unused_sub_pmd_start), PAGE_UNUSED, + memset((void *)unused_sub_pmd_start, PAGE_UNUSED, ALIGN(unused_sub_pmd_start, PMD_SIZE) - unused_sub_pmd_start); unused_sub_pmd_start = 0; } @@ -97,7 +98,7 @@ static void vmemmap_mark_sub_pmd_used(unsigned long start, unsigned long end) * getting removed (just in case the memmap never gets initialized, * e.g., because the memory block never gets onlined). */ - memset(__va(start), 0, sizeof(struct page)); + memset((void *)start, 0, sizeof(struct page)); } static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end) @@ -118,7 +119,7 @@ static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end) static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end) { - void *page = __va(ALIGN_DOWN(start, PMD_SIZE)); + unsigned long page = ALIGN_DOWN(start, PMD_SIZE); vmemmap_flush_unused_sub_pmd(); @@ -127,7 +128,7 @@ static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end) /* Mark the unused parts of the new memmap page PAGE_UNUSED. */ if (!IS_ALIGNED(start, PMD_SIZE)) - memset(page, PAGE_UNUSED, start - __pa(page)); + memset((void *)page, PAGE_UNUSED, start - page); /* * We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of * consecutive sections. Remember for the last added PMD the last @@ -140,11 +141,11 @@ static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end) /* Returns true if the PMD is completely unused and can be freed. */ static bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end) { - void *page = __va(ALIGN_DOWN(start, PMD_SIZE)); + unsigned long page = ALIGN_DOWN(start, PMD_SIZE); vmemmap_flush_unused_sub_pmd(); - memset(__va(start), PAGE_UNUSED, end - start); - return !memchr_inv(page, PAGE_UNUSED, PMD_SIZE); + memset((void *)start, PAGE_UNUSED, end - start); + return !memchr_inv((void *)page, PAGE_UNUSED, PMD_SIZE); } /* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */ @@ -165,7 +166,7 @@ static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr, if (pte_none(*pte)) continue; if (!direct) - vmem_free_pages(pfn_to_phys(pte_pfn(*pte)), 0); + vmem_free_pages((unsigned long) pfn_to_virt(pte_pfn(*pte)), 0); pte_clear(&init_mm, addr, pte); } else if (pte_none(*pte)) { if (!direct) { @@ -175,7 +176,7 @@ static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr, goto out; pte_val(*pte) = __pa(new_page) | prot; } else { - pte_val(*pte) = addr | prot; + pte_val(*pte) = __pa(addr) | prot; } } else { continue; @@ -200,7 +201,7 @@ static void try_free_pte_table(pmd_t *pmd, unsigned long start) if (!pte_none(*pte)) return; } - vmem_pte_free(__va(pmd_deref(*pmd))); + vmem_pte_free((unsigned long *) pmd_deref(*pmd)); pmd_clear(pmd); } @@ -241,7 +242,7 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, IS_ALIGNED(next, PMD_SIZE) && MACHINE_HAS_EDAT1 && addr && direct && !debug_pagealloc_enabled()) { - pmd_val(*pmd) = addr | prot; + pmd_val(*pmd) = __pa(addr) | prot; pages++; continue; } else if (!direct && MACHINE_HAS_EDAT1) { @@ -337,7 +338,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, IS_ALIGNED(next, PUD_SIZE) && MACHINE_HAS_EDAT2 && addr && direct && !debug_pagealloc_enabled()) { - pud_val(*pud) = addr | prot; + pud_val(*pud) = __pa(addr) | prot; pages++; continue; } @@ -532,11 +533,22 @@ void vmem_remove_mapping(unsigned long start, unsigned long size) mutex_unlock(&vmem_mutex); } +struct range arch_get_mappable_range(void) +{ + struct range mhp_range; + + mhp_range.start = 0; + mhp_range.end = VMEM_MAX_PHYS - 1; + return mhp_range; +} + int vmem_add_mapping(unsigned long start, unsigned long size) { + struct range range = arch_get_mappable_range(); int ret; - if (start + size > VMEM_MAX_PHYS || + if (start < range.start || + start + size > range.end + 1 || start + size < start) return -ERANGE; diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 0a4182792876..f973e2ead197 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1205,18 +1205,23 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, jit->seen |= SEEN_MEM; break; /* - * BPF_STX XADD (atomic_add) + * BPF_ATOMIC */ - case BPF_STX | BPF_XADD | BPF_W: /* *(u32 *)(dst + off) += src */ - /* laal %w0,%src,off(%dst) */ - EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W0, src_reg, - dst_reg, off); - jit->seen |= SEEN_MEM; - break; - case BPF_STX | BPF_XADD | BPF_DW: /* *(u64 *)(dst + off) += src */ - /* laalg %w0,%src,off(%dst) */ - EMIT6_DISP_LH(0xeb000000, 0x00ea, REG_W0, src_reg, - dst_reg, off); + case BPF_STX | BPF_ATOMIC | BPF_DW: + case BPF_STX | BPF_ATOMIC | BPF_W: + if (insn->imm != BPF_ADD) { + pr_err("Unknown atomic operation %02x\n", insn->imm); + return -1; + } + + /* *(u32/u64 *)(dst + off) += src + * + * BFW_W: laal %w0,%src,off(%dst) + * BPF_DW: laalg %w0,%src,off(%dst) + */ + EMIT6_DISP_LH(0xeb000000, + BPF_SIZE(insn->code) == BPF_W ? 0x00fa : 0x00ea, + REG_W0, src_reg, dst_reg, off); jit->seen |= SEEN_MEM; break; /* diff --git a/arch/s390/oprofile/Makefile b/arch/s390/oprofile/Makefile deleted file mode 100644 index 36261f9d360b..000000000000 --- a/arch/s390/oprofile/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_OPROFILE) += oprofile.o - -DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ - oprof.o cpu_buffer.o buffer_sync.o \ - event_buffer.o oprofile_files.o \ - oprofilefs.o oprofile_stats.o \ - timer_int.o ) - -oprofile-y := $(DRIVER_OBJS) init.o diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c deleted file mode 100644 index 7441857df51b..000000000000 --- a/arch/s390/oprofile/init.c +++ /dev/null @@ -1,37 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * S390 Version - * Copyright IBM Corp. 2002, 2011 - * Author(s): Thomas Spatzier (tspat@de.ibm.com) - * Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com) - * Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com) - * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com) - * - * @remark Copyright 2002-2011 OProfile authors - */ - -#include <linux/oprofile.h> -#include <linux/init.h> -#include <asm/processor.h> -#include <asm/unwind.h> - -static void s390_backtrace(struct pt_regs *regs, unsigned int depth) -{ - struct unwind_state state; - - unwind_for_each_frame(&state, current, regs, 0) { - if (depth-- == 0) - break; - oprofile_add_trace(state.ip); - } -} - -int __init oprofile_arch_init(struct oprofile_operations *ops) -{ - ops->backtrace = s390_backtrace; - return 0; -} - -void oprofile_arch_exit(void) -{ -} diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 41df8fcfddde..600881d894dd 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -695,43 +695,68 @@ void zpci_remove_device(struct zpci_dev *zdev) } } -int zpci_create_device(struct zpci_dev *zdev) +/** + * zpci_create_device() - Create a new zpci_dev and add it to the zbus + * @fid: Function ID of the device to be created + * @fh: Current Function Handle of the device to be created + * @state: Initial state after creation either Standby or Configured + * + * Creates a new zpci device and adds it to its, possibly newly created, zbus + * as well as zpci_list. + * + * Returns: 0 on success, an error value otherwise + */ +int zpci_create_device(u32 fid, u32 fh, enum zpci_state state) { + struct zpci_dev *zdev; int rc; - kref_init(&zdev->kref); + zpci_dbg(3, "add fid:%x, fh:%x, c:%d\n", fid, fh, state); + zdev = kzalloc(sizeof(*zdev), GFP_KERNEL); + if (!zdev) + return -ENOMEM; - spin_lock(&zpci_list_lock); - list_add_tail(&zdev->entry, &zpci_list); - spin_unlock(&zpci_list_lock); + /* FID and Function Handle are the static/dynamic identifiers */ + zdev->fid = fid; + zdev->fh = fh; - rc = zpci_init_iommu(zdev); + /* Query function properties and update zdev */ + rc = clp_query_pci_fn(zdev); if (rc) - goto out; + goto error; + zdev->state = state; + kref_init(&zdev->kref); mutex_init(&zdev->lock); + + rc = zpci_init_iommu(zdev); + if (rc) + goto error; + if (zdev->state == ZPCI_FN_STATE_CONFIGURED) { rc = zpci_enable_device(zdev); if (rc) - goto out_destroy_iommu; + goto error_destroy_iommu; } rc = zpci_bus_device_register(zdev, &pci_root_ops); if (rc) - goto out_disable; + goto error_disable; + + spin_lock(&zpci_list_lock); + list_add_tail(&zdev->entry, &zpci_list); + spin_unlock(&zpci_list_lock); return 0; -out_disable: +error_disable: if (zdev->state == ZPCI_FN_STATE_ONLINE) zpci_disable_device(zdev); - -out_destroy_iommu: +error_destroy_iommu: zpci_destroy_iommu(zdev); -out: - spin_lock(&zpci_list_lock); - list_del(&zdev->entry); - spin_unlock(&zpci_list_lock); +error: + zpci_dbg(0, "add fid:%x, rc:%d\n", fid, rc); + kfree(zdev); return rc; } diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 153720d21ae7..d3331596ddbe 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -181,7 +181,7 @@ static int clp_store_query_pci_fn(struct zpci_dev *zdev, return 0; } -static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh) +int clp_query_pci_fn(struct zpci_dev *zdev) { struct clp_req_rsp_query_pci *rrb; int rc; @@ -194,7 +194,7 @@ static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh) rrb->request.hdr.len = sizeof(rrb->request); rrb->request.hdr.cmd = CLP_QUERY_PCI_FN; rrb->response.hdr.len = sizeof(rrb->response); - rrb->request.fh = fh; + rrb->request.fh = zdev->fh; rc = clp_req(rrb, CLP_LPS_PCI); if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) { @@ -212,40 +212,6 @@ out: return rc; } -int clp_add_pci_device(u32 fid, u32 fh, int configured) -{ - struct zpci_dev *zdev; - int rc = -ENOMEM; - - zpci_dbg(3, "add fid:%x, fh:%x, c:%d\n", fid, fh, configured); - zdev = kzalloc(sizeof(*zdev), GFP_KERNEL); - if (!zdev) - goto error; - - zdev->fh = fh; - zdev->fid = fid; - - /* Query function properties and update zdev */ - rc = clp_query_pci_fn(zdev, fh); - if (rc) - goto error; - - if (configured) - zdev->state = ZPCI_FN_STATE_CONFIGURED; - else - zdev->state = ZPCI_FN_STATE_STANDBY; - - rc = zpci_create_device(zdev); - if (rc) - goto error; - return 0; - -error: - zpci_dbg(0, "add fid:%x, rc:%d\n", fid, rc); - kfree(zdev); - return rc; -} - static int clp_refresh_fh(u32 fid); /* * Enable/Disable a given PCI function and update its function handle if @@ -408,7 +374,7 @@ static void __clp_add(struct clp_fh_list_entry *entry, void *data) zdev = get_zdev_by_fid(entry->fid); if (!zdev) - clp_add_pci_device(entry->fid, entry->fh, entry->config_state); + zpci_create_device(entry->fid, entry->fh, entry->config_state); } int clp_scan_pci_devices(void) diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 9a6bae503fe6..b4162da4e8a2 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -80,7 +80,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) enum zpci_state state; int ret; - if (zdev && zdev->zbus && zdev->zbus->bus) + if (zdev && zdev->zbus->bus) pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); zpci_err("avail CCDF:\n"); @@ -89,7 +89,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) switch (ccdf->pec) { case 0x0301: /* Reserved|Standby -> Configured */ if (!zdev) { - ret = clp_add_pci_device(ccdf->fid, ccdf->fh, 1); + zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_CONFIGURED); break; } /* the configuration request may be stale */ @@ -116,7 +116,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) break; case 0x0302: /* Reserved -> Standby */ if (!zdev) { - clp_add_pci_device(ccdf->fid, ccdf->fh, 0); + zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY); break; } zdev->fh = ccdf->fh; diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c index 18f2d10c3176..474617b88648 100644 --- a/arch/s390/pci/pci_mmio.c +++ b/arch/s390/pci/pci_mmio.c @@ -170,7 +170,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr, if (!(vma->vm_flags & VM_WRITE)) goto out_unlock_mmap; - ret = follow_pte(vma->vm_mm, mmio_addr, NULL, &ptep, NULL, &ptl); + ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl); if (ret) goto out_unlock_mmap; @@ -311,7 +311,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr, if (!(vma->vm_flags & VM_WRITE)) goto out_unlock_mmap; - ret = follow_pte(vma->vm_mm, mmio_addr, NULL, &ptep, NULL, &ptl); + ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl); if (ret) goto out_unlock_mmap; diff --git a/arch/s390/tools/opcodes.txt b/arch/s390/tools/opcodes.txt index 46d8ed96cf06..0e207c46e8da 100644 --- a/arch/s390/tools/opcodes.txt +++ b/arch/s390/tools/opcodes.txt @@ -597,7 +597,7 @@ b9b3 cu42 RRE_RR b9bd trtre RRF_U0RR b9be srstu RRE_RR b9bf trte RRF_U0RR -b9c0 selhhhr RRF_RURR +b9c0 selfhr RRF_RURR b9c8 ahhhr RRF_R0RR2 b9c9 shhhr RRF_R0RR2 b9ca alhhhr RRF_R0RR2 |