103 files changed, 920 insertions, 1327 deletions
diff --git a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt
index cd29083e16ec..48ffb38f699e 100644
--- a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt
+++ b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt
@@ -7,7 +7,15 @@ connected to the IPMMU through a port called micro-TLB.
 
 Required Properties:
 
-  - compatible: Must contain "renesas,ipmmu-vmsa".
+  - compatible: Must contain SoC-specific and generic entries from below.
+
+    - "renesas,ipmmu-r8a73a4" for the R8A73A4 (R-Mobile APE6) IPMMU.
+    - "renesas,ipmmu-r8a7790" for the R8A7790 (R-Car H2) IPMMU.
+    - "renesas,ipmmu-r8a7791" for the R8A7791 (R-Car M2-W) IPMMU.
+    - "renesas,ipmmu-r8a7793" for the R8A7793 (R-Car M2-N) IPMMU.
+    - "renesas,ipmmu-r8a7794" for the R8A7794 (R-Car E2) IPMMU.
+    - "renesas,ipmmu-vmsa" for generic R-Car Gen2 VMSA-compatible IPMMU.
+
   - reg: Base address and size of the IPMMU registers.
   - interrupts: Specifiers for the MMU fault interrupts. For instances that
     support secure mode two interrupts must be specified, for non-secure and
@@ -27,7 +35,7 @@ node with the following property:
 Example: R8A7791 IPMMU-MX and VSP1-D0 bus master
 
 	ipmmu_mx: mmu@fe951000 {
-		compatible = "renasas,ipmmu-vmsa";
+		compatible = "renasas,ipmmu-r8a7791", "renasas,ipmmu-vmsa";
 		reg = <0 0xfe951000 0 0x1000>;
 		interrupts = <0 222 IRQ_TYPE_LEVEL_HIGH>,
 			     <0 221 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/Makefile b/Makefile
index 9d94adeceab6..70dea02f1346 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 4
 SUBLEVEL = 0
-EXTRAVERSION = -rc8
+EXTRAVERSION =
 NAME = Blurry Fish Butt
 
 # *DOCUMENTATION*
diff --git a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi
index 314f59c12162..d0c743853318 100644
--- a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi
+++ b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi
@@ -25,9 +25,9 @@
 		cache-sets = <512>;
 		cache-line-size = <32>;
 		/* At full speed latency must be >=2 */
-		arm,tag-latency = <2>;
-		arm,data-latency = <2 2>;
-		arm,dirty-latency = <2>;
+		arm,tag-latency = <8>;
+		arm,data-latency = <8 8>;
+		arm,dirty-latency = <8>;
 	};
 
 	mtu0: mtu@101e2000 {
diff --git a/arch/arm/boot/dts/versatile-ab.dts b/arch/arm/boot/dts/versatile-ab.dts
index 01f40197ea13..3279bf1a17a1 100644
--- a/arch/arm/boot/dts/versatile-ab.dts
+++ b/arch/arm/boot/dts/versatile-ab.dts
@@ -110,7 +110,11 @@
 			interrupt-parent = <&vic>;
 			interrupts = <31>; /* Cascaded to vic */
 			clear-mask = <0xffffffff>;
-			valid-mask = <0xffc203f8>;
+			/*
+			 * Valid interrupt lines mask according to
+			 * table 4-36 page 4-50 of ARM DUI 0225D
+			 */
+			valid-mask = <0x0760031b>;
 		};
 
 		dma@10130000 {
@@ -266,8 +270,8 @@
 			};
 			mmc@5000 {
 				compatible = "arm,pl180", "arm,primecell";
-				reg = < 0x5000 0x1000>;
-				interrupts-extended = <&vic 22 &sic 2>;
+				reg = <0x5000 0x1000>;
+				interrupts-extended = <&vic 22 &sic 1>;
 				clocks = <&xtal24mhz>, <&pclk>;
 				clock-names = "mclk", "apb_pclk";
 			};
diff --git a/arch/arm/boot/dts/versatile-pb.dts b/arch/arm/boot/dts/versatile-pb.dts
index b83137f66034..33a8eb28374e 100644
--- a/arch/arm/boot/dts/versatile-pb.dts
+++ b/arch/arm/boot/dts/versatile-pb.dts
@@ -5,6 +5,16 @@
 	compatible = "arm,versatile-pb";
 
 	amba {
+		/* The Versatile PB is using more SIC IRQ lines than the AB */
+		sic: intc@10003000 {
+			clear-mask = <0xffffffff>;
+			/*
+			 * Valid interrupt lines mask according to
+			 * figure 3-30 page 3-74 of ARM DUI 0224B
+			 */
+			valid-mask = <0x7fe003ff>;
+		};
+
 		gpio2: gpio@101e6000 {
 			compatible = "arm,pl061", "arm,primecell";
 			reg = <0x101e6000 0x1000>;
@@ -67,6 +77,13 @@
 		};
 
 		fpga {
+			mmc@5000 {
+				/*
+				 * Overrides the interrupt assignment from
+				 * the Versatile AB board file.
+				 */
+				interrupts-extended = <&sic 22 &sic 23>;
+			};
 			uart@9000 {
 				compatible = "arm,pl011", "arm,primecell";
 				reg = <0x9000 0x1000>;
@@ -86,7 +103,8 @@
 			mmc@b000 {
 				compatible = "arm,pl180", "arm,primecell";
 				reg = <0xb000 0x1000>;
-				interrupts-extended = <&vic 23 &sic 2>;
+				interrupt-parent = <&sic>;
+				interrupts = <1>, <2>;
 				clocks = <&xtal24mhz>, <&pclk>;
 				clock-names = "mclk", "apb_pclk";
 			};
diff --git a/arch/arm/boot/dts/wm8650.dtsi b/arch/arm/boot/dts/wm8650.dtsi
index b1c59a766a13..e12213d16693 100644
--- a/arch/arm/boot/dts/wm8650.dtsi
+++ b/arch/arm/boot/dts/wm8650.dtsi
@@ -187,6 +187,15 @@
 			interrupts = <43>;
 		};
 
+		sdhc@d800a000 {
+			compatible = "wm,wm8505-sdhc";
+			reg = <0xd800a000 0x400>;
+			interrupts = <20>, <21>;
+			clocks = <&clksdhc>;
+			bus-width = <4>;
+			sdon-inverted;
+		};
+
 		fb: fb@d8050800 {
 			compatible = "wm,wm8505-fb";
 			reg = <0xd8050800 0x200>;
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index 69a22fdb52a5..cd7b198fc79e 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -366,6 +366,7 @@ CONFIG_BATTERY_MAX17042=m
 CONFIG_CHARGER_MAX14577=m
 CONFIG_CHARGER_MAX77693=m
 CONFIG_CHARGER_TPS65090=y
+CONFIG_AXP20X_POWER=m
 CONFIG_POWER_RESET_AS3722=y
 CONFIG_POWER_RESET_GPIO=y
 CONFIG_POWER_RESET_GPIO_RESTART=y
diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig
index 3c36e16fcacf..b503a89441bf 100644
--- a/arch/arm/configs/sunxi_defconfig
+++ b/arch/arm/configs/sunxi_defconfig
@@ -84,6 +84,7 @@ CONFIG_SPI_SUN4I=y
 CONFIG_SPI_SUN6I=y
 CONFIG_GPIO_SYSFS=y
 CONFIG_POWER_SUPPLY=y
+CONFIG_AXP20X_POWER=y
 CONFIG_THERMAL=y
 CONFIG_CPU_THERMAL=y
 CONFIG_WATCHDOG=y
diff --git a/arch/arm/mach-omap2/gpmc-onenand.c b/arch/arm/mach-omap2/gpmc-onenand.c
index 17a6f752a436..7b76ce01c21d 100644
--- a/arch/arm/mach-omap2/gpmc-onenand.c
+++ b/arch/arm/mach-omap2/gpmc-onenand.c
@@ -149,8 +149,8 @@ static int omap2_onenand_get_freq(struct omap_onenand_platform_data *cfg,
 		freq = 104;
 		break;
 	default:
-		freq = 54;
-		break;
+		pr_err("onenand rate not detected, bad GPMC async timings?\n");
+		freq = 0;
 	}
 
 	return freq;
@@ -271,6 +271,11 @@ static int omap2_onenand_setup_async(void __iomem *onenand_base)
 	struct gpmc_timings t;
 	int ret;
 
+	/*
+	 * Note that we need to keep sync_write set for the call to
+	 * omap2_onenand_set_async_mode() to work to detect the onenand
+	 * supported clock rate for the sync timings.
+	 */
 	if (gpmc_onenand_data->of_node) {
 		gpmc_read_settings_dt(gpmc_onenand_data->of_node,
 				      &onenand_async);
@@ -281,12 +286,9 @@ static int omap2_onenand_setup_async(void __iomem *onenand_base)
 			else
 				gpmc_onenand_data->flags |= ONENAND_SYNC_READ;
 			onenand_async.sync_read = false;
-			onenand_async.sync_write = false;
 		}
 	}
 
-	omap2_onenand_set_async_mode(onenand_base);
-
 	omap2_onenand_calc_async_timings(&t);
 
 	ret = gpmc_cs_program_settings(gpmc_onenand_data->cs, &onenand_async);
@@ -310,6 +312,8 @@ static int omap2_onenand_setup_sync(void __iomem *onenand_base, int *freq_ptr)
 	if (!freq) {
 		/* Very first call freq is not known */
 		freq = omap2_onenand_get_freq(gpmc_onenand_data, onenand_base);
+		if (!freq)
+			return -ENODEV;
 		set_onenand_cfg(onenand_base);
 	}
 
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 591f9db3bf40..93d0b6d0b63e 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -187,19 +187,6 @@ static inline int mem_words_used(struct jit_ctx *ctx)
 	return fls(ctx->seen & SEEN_MEM);
 }
 
-static inline bool is_load_to_a(u16 inst)
-{
-	switch (inst) {
-	case BPF_LD | BPF_W | BPF_LEN:
-	case BPF_LD | BPF_W | BPF_ABS:
-	case BPF_LD | BPF_H | BPF_ABS:
-	case BPF_LD | BPF_B | BPF_ABS:
-		return true;
-	default:
-		return false;
-	}
-}
-
 static void jit_fill_hole(void *area, unsigned int size)
 {
 	u32 *ptr;
@@ -211,7 +198,6 @@ static void jit_fill_hole(void *area, unsigned int size)
 static void build_prologue(struct jit_ctx *ctx)
 {
 	u16 reg_set = saved_regs(ctx);
-	u16 first_inst = ctx->skf->insns[0].code;
 	u16 off;
 
 #ifdef CONFIG_FRAME_POINTER
@@ -241,7 +227,7 @@ static void build_prologue(struct jit_ctx *ctx)
 		emit(ARM_MOV_I(r_X, 0), ctx);
 
 	/* do not leak kernel data to userspace */
-	if ((first_inst != (BPF_RET | BPF_K)) && !(is_load_to_a(first_inst)))
+	if (bpf_needs_clear_a(&ctx->skf->insns[0]))
 		emit(ARM_MOV_I(r_A, 0), ctx);
 
 	/* stack space for the BPF_MEM words */
@@ -770,7 +756,8 @@ load_ind:
 		case BPF_ALU | BPF_RSH | BPF_K:
 			if (unlikely(k > 31))
 				return -1;
-			emit(ARM_LSR_I(r_A, r_A, k), ctx);
+			if (k)
+				emit(ARM_LSR_I(r_A, r_A, k), ctx);
 			break;
 		case BPF_ALU | BPF_RSH | BPF_X:
 			update_on_xread(ctx);
diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 77cb27309db2..1a8c96035716 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -521,19 +521,6 @@ static inline u16 align_sp(unsigned int num)
 	return num;
 }
 
-static bool is_load_to_a(u16 inst)
-{
-	switch (inst) {
-	case BPF_LD | BPF_W | BPF_LEN:
-	case BPF_LD | BPF_W | BPF_ABS:
-	case BPF_LD | BPF_H | BPF_ABS:
-	case BPF_LD | BPF_B | BPF_ABS:
-		return true;
-	default:
-		return false;
-	}
-}
-
 static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset)
 {
 	int i = 0, real_off = 0;
@@ -614,7 +601,6 @@ static unsigned int get_stack_depth(struct jit_ctx *ctx)
 
 static void build_prologue(struct jit_ctx *ctx)
 {
-	u16 first_inst = ctx->skf->insns[0].code;
 	int sp_off;
 
 	/* Calculate the total offset for the stack pointer */
@@ -641,7 +627,7 @@ static void build_prologue(struct jit_ctx *ctx)
 		emit_jit_reg_move(r_X, r_zero, ctx);
 
 	/* Do not leak kernel data to userspace */
-	if ((first_inst != (BPF_RET | BPF_K)) && !(is_load_to_a(first_inst)))
+	if (bpf_needs_clear_a(&ctx->skf->insns[0]))
 		emit_jit_reg_move(r_A, r_zero, ctx);
 }
 
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 04782164ee67..2d66a8446198 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -78,18 +78,9 @@ static void bpf_jit_build_prologue(struct bpf_prog *fp, u32 *image,
 		PPC_LI(r_X, 0);
 	}
 
-	switch (filter[0].code) {
-	case BPF_RET | BPF_K:
-	case BPF_LD | BPF_W | BPF_LEN:
-	case BPF_LD | BPF_W | BPF_ABS:
-	case BPF_LD | BPF_H | BPF_ABS:
-	case BPF_LD | BPF_B | BPF_ABS:
-		/* first instruction sets A register (or is RET 'constant') */
-		break;
-	default:
-		/* make sure we dont leak kernel information to user */
+	/* make sure we dont leak kernel information to user */
+	if (bpf_needs_clear_a(&filter[0]))
 		PPC_LI(r_A, 0);
-	}
 }
 
 static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c
index 22564f5f2364..3e6e05a7c4c2 100644
--- a/arch/sparc/net/bpf_jit_comp.c
+++ b/arch/sparc/net/bpf_jit_comp.c
@@ -420,22 +420,9 @@ void bpf_jit_compile(struct bpf_prog *fp)
 		}
 		emit_reg_move(O7, r_saved_O7);
 
-		switch (filter[0].code) {
-		case BPF_RET | BPF_K:
-		case BPF_LD | BPF_W | BPF_LEN:
-		case BPF_LD | BPF_W | BPF_ABS:
-		case BPF_LD | BPF_H | BPF_ABS:
-		case BPF_LD | BPF_B | BPF_ABS:
-			/* The first instruction sets the A register (or is
-			 * a "RET 'constant'")
-			 */
-			break;
-		default:
-			/* Make sure we dont leak kernel information to the
-			 * user.
-			 */
+		/* Make sure we dont leak kernel information to the user. */
+		if (bpf_needs_clear_a(&filter[0]))
 			emit_clear(r_A); /* A = 0 */
-		}
 
 		for (i = 0; i < flen; i++) {
 			unsigned int K = filter[i].k;
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 106c21bd7f44..8ec7a4599c08 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -176,8 +176,6 @@ config NR_CPUS
 	  smaller kernel memory footprint results from using a smaller
 	  value on chips with fewer tiles.
 
-if TILEGX
-
 choice
 	prompt "Kernel page size"
 	default PAGE_SIZE_64KB
@@ -188,8 +186,11 @@ choice
 	  connections, etc., it may be better to select 16KB, which uses
 	  memory more efficiently at some cost in TLB performance.
 
-	  Note that this option is TILE-Gx specific; currently
-	  TILEPro page size is set by rebuilding the hypervisor.
+	  Note that for TILEPro, you must also rebuild the hypervisor
+	  with a matching page size.
+
+config PAGE_SIZE_4KB
+	bool "4KB" if TILEPRO
 
 config PAGE_SIZE_16KB
 	bool "16KB"
@@ -199,8 +200,6 @@ config PAGE_SIZE_64KB
 
 endchoice
 
-endif
-
 source "kernel/Kconfig.hz"
 
 config KEXEC
diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h
index a213a8d84a95..8eca6a0e1762 100644
--- a/arch/tile/include/asm/page.h
+++ b/arch/tile/include/asm/page.h
@@ -20,15 +20,17 @@
 #include <arch/chip.h>
 
 /* PAGE_SHIFT and HPAGE_SHIFT determine the page sizes. */
-#if defined(CONFIG_PAGE_SIZE_16KB)
+#if defined(CONFIG_PAGE_SIZE_4KB)  /* tilepro only */
+#define PAGE_SHIFT	12
+#define CTX_PAGE_FLAG	HV_CTX_PG_SM_4K
+#elif defined(CONFIG_PAGE_SIZE_16KB)
 #define PAGE_SHIFT	14
 #define CTX_PAGE_FLAG	HV_CTX_PG_SM_16K
 #elif defined(CONFIG_PAGE_SIZE_64KB)
 #define PAGE_SHIFT	16
 #define CTX_PAGE_FLAG	HV_CTX_PG_SM_64K
 #else
-#define PAGE_SHIFT	HV_LOG2_DEFAULT_PAGE_SIZE_SMALL
-#define CTX_PAGE_FLAG	0
+#error Page size not specified in Kconfig
 #endif
 #define HPAGE_SHIFT	HV_LOG2_DEFAULT_PAGE_SIZE_LARGE
 
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index a89fdbc1f0be..03663740c866 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -421,7 +421,7 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
 	regs->ip = landing_pad;
 
 	/*
-	 * Fetch ECX from where the vDSO stashed it.
+	 * Fetch EBP from where the vDSO stashed it.
 	 *
 	 * WARNING: We are in CONTEXT_USER and RCU isn't paying attention!
 	 */
@@ -432,10 +432,10 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
 		 * Micro-optimization: the pointer we're following is explicitly
 		 * 32 bits, so it can't be out of range.
 		 */
-		__get_user(*(u32 *)&regs->cx,
+		__get_user(*(u32 *)&regs->bp,
 			    (u32 __user __force *)(unsigned long)(u32)regs->sp)
 #else
-		get_user(*(u32 *)&regs->cx,
+		get_user(*(u32 *)&regs->bp,
 			 (u32 __user __force *)(unsigned long)(u32)regs->sp)
 #endif
 		) {
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 3eb572ed3d7a..f3b6d54e0042 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -292,7 +292,7 @@ ENTRY(entry_SYSENTER_32)
 	movl	TSS_sysenter_sp0(%esp), %esp
 sysenter_past_esp:
 	pushl	$__USER_DS		/* pt_regs->ss */
-	pushl	%ecx			/* pt_regs->cx */
+	pushl	%ebp			/* pt_regs->sp (stashed in bp) */
 	pushfl				/* pt_regs->flags (except IF = 0) */
 	orl	$X86_EFLAGS_IF, (%esp)	/* Fix IF */
 	pushl	$__USER_CS		/* pt_regs->cs */
@@ -308,8 +308,9 @@ sysenter_past_esp:
 
 	movl	%esp, %eax
 	call	do_fast_syscall_32
-	testl	%eax, %eax
-	jz	.Lsyscall_32_done
+	/* XEN PV guests always use IRET path */
+	ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
+		    "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
 
 /* Opportunistic SYSEXIT */
 	TRACE_IRQS_ON			/* User mode traces as IRQs on. */
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index c3201830a85e..6a1ae3751e82 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -63,7 +63,7 @@ ENTRY(entry_SYSENTER_compat)
 
 	/* Construct struct pt_regs on stack */
 	pushq	$__USER32_DS		/* pt_regs->ss */
-	pushq	%rcx			/* pt_regs->sp */
+	pushq	%rbp			/* pt_regs->sp (stashed in bp) */
 
 	/*
 	 * Push flags.  This is nasty.  First, interrupts are currently
@@ -82,14 +82,14 @@ ENTRY(entry_SYSENTER_compat)
 	pushq	%rdi			/* pt_regs->di */
 	pushq	%rsi			/* pt_regs->si */
 	pushq	%rdx			/* pt_regs->dx */
-	pushq	%rcx			/* pt_regs->cx (will be overwritten) */
+	pushq	%rcx			/* pt_regs->cx */
 	pushq	$-ENOSYS		/* pt_regs->ax */
 	pushq   %r8                     /* pt_regs->r8  = 0 */
 	pushq   %r8                     /* pt_regs->r9  = 0 */
 	pushq   %r8                     /* pt_regs->r10 = 0 */
 	pushq   %r8                     /* pt_regs->r11 = 0 */
 	pushq   %rbx                    /* pt_regs->rbx */
-	pushq   %rbp                    /* pt_regs->rbp */
+	pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
 	pushq   %r8                     /* pt_regs->r12 = 0 */
 	pushq   %r8                     /* pt_regs->r13 = 0 */
 	pushq   %r8                     /* pt_regs->r14 = 0 */
@@ -121,8 +121,9 @@ sysenter_flags_fixed:
 
 	movq	%rsp, %rdi
 	call	do_fast_syscall_32
-	testl	%eax, %eax
-	jz	.Lsyscall_32_done
+	/* XEN PV guests always use IRET path */
+	ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
+		    "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
 	jmp	sysret32_from_system_call
 
 sysenter_fix_flags:
@@ -178,7 +179,7 @@ ENTRY(entry_SYSCALL_compat)
 	pushq	%rdi			/* pt_regs->di */
 	pushq	%rsi			/* pt_regs->si */
 	pushq	%rdx			/* pt_regs->dx */
-	pushq	%rcx			/* pt_regs->cx (will be overwritten) */
+	pushq	%rbp			/* pt_regs->cx (stashed in bp) */
 	pushq	$-ENOSYS		/* pt_regs->ax */
 	xorq    %r8,%r8
 	pushq   %r8                     /* pt_regs->r8  = 0 */
@@ -186,7 +187,7 @@ ENTRY(entry_SYSCALL_compat)
 	pushq   %r8                     /* pt_regs->r10 = 0 */
 	pushq   %r8                     /* pt_regs->r11 = 0 */
 	pushq   %rbx                    /* pt_regs->rbx */
-	pushq   %rbp                    /* pt_regs->rbp */
+	pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
 	pushq   %r8                     /* pt_regs->r12 = 0 */
 	pushq   %r8                     /* pt_regs->r13 = 0 */
 	pushq   %r8                     /* pt_regs->r14 = 0 */
@@ -200,8 +201,9 @@ ENTRY(entry_SYSCALL_compat)
 
 	movq	%rsp, %rdi
 	call	do_fast_syscall_32
-	testl	%eax, %eax
-	jz	.Lsyscall_32_done
+	/* XEN PV guests always use IRET path */
+	ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
+		    "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
 
 	/* Opportunistic SYSRET */
 sysret32_from_system_call:
diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S
index 93bd8452383f..3a1d9297074b 100644
--- a/arch/x86/entry/vdso/vdso32/system_call.S
+++ b/arch/x86/entry/vdso/vdso32/system_call.S
@@ -1,5 +1,5 @@
 /*
- * Code for the vDSO.  This version uses the old int $0x80 method.
+ * AT_SYSINFO entry point
 */
 
 #include <asm/dwarf2.h>
@@ -21,35 +21,67 @@ __kernel_vsyscall:
 	/*
 	 * Reshuffle regs so that all of any of the entry instructions
 	 * will preserve enough state.
+	 *
+	 * A really nice entry sequence would be:
+	 *  pushl %edx
+	 *  pushl %ecx
+	 *  movl  %esp, %ecx
+	 *
+	 * Unfortunately, naughty Android versions between July and December
+	 * 2015 actually hardcode the traditional Linux SYSENTER entry
+	 * sequence.  That is severely broken for a number of reasons (ask
+	 * anyone with an AMD CPU, for example).  Nonetheless, we try to keep
+	 * it working approximately as well as it ever worked.
+	 *
+	 * This link may eludicate some of the history:
+	 *   https://android-review.googlesource.com/#/q/Iac3295376d61ef83e713ac9b528f3b50aa780cd7
+	 * personally, I find it hard to understand what's going on there.
+	 *
+	 * Note to future user developers: DO NOT USE SYSENTER IN YOUR CODE.
+	 * Execute an indirect call to the address in the AT_SYSINFO auxv
+	 * entry.  That is the ONLY correct way to make a fast 32-bit system
+	 * call on Linux.  (Open-coding int $0x80 is also fine, but it's
+	 * slow.)
 	 */
+	pushl	%ecx
+	CFI_ADJUST_CFA_OFFSET	4
+	CFI_REL_OFFSET		ecx, 0
 	pushl	%edx
 	CFI_ADJUST_CFA_OFFSET	4
 	CFI_REL_OFFSET		edx, 0
-	pushl	%ecx
+	pushl	%ebp
 	CFI_ADJUST_CFA_OFFSET	4
-	CFI_REL_OFFSET		ecx, 0
-	movl	%esp, %ecx
+	CFI_REL_OFFSET		ebp, 0
+
+	#define SYSENTER_SEQUENCE	"movl %esp, %ebp; sysenter"
+	#define SYSCALL_SEQUENCE	"movl %ecx, %ebp; syscall"
 
 #ifdef CONFIG_X86_64
 	/* If SYSENTER (Intel) or SYSCALL32 (AMD) is available, use it. */
-	ALTERNATIVE_2 "", "sysenter", X86_FEATURE_SYSENTER32, \
-	                  "syscall",  X86_FEATURE_SYSCALL32
+	ALTERNATIVE_2 "", SYSENTER_SEQUENCE, X86_FEATURE_SYSENTER32, \
+	                  SYSCALL_SEQUENCE,  X86_FEATURE_SYSCALL32
 #else
-	ALTERNATIVE "", "sysenter", X86_FEATURE_SEP
+	ALTERNATIVE "", SYSENTER_SEQUENCE, X86_FEATURE_SEP
 #endif
 
 	/* Enter using int $0x80 */
-	movl	(%esp), %ecx
 	int	$0x80
 GLOBAL(int80_landing_pad)
 
-	/* Restore ECX and EDX in case they were clobbered. */
-	popl	%ecx
-	CFI_RESTORE		ecx
+	/*
+	 * Restore EDX and ECX in case they were clobbered.  EBP is not
+	 * clobbered (the kernel restores it), but it's cleaner and
+	 * probably faster to pop it than to adjust ESP using addl.
+	 */
+	popl	%ebp
+	CFI_RESTORE		ebp
 	CFI_ADJUST_CFA_OFFSET	-4
 	popl	%edx
 	CFI_RESTORE		edx
 	CFI_ADJUST_CFA_OFFSET	-4
+	popl	%ecx
+	CFI_RESTORE		ecx
+	CFI_ADJUST_CFA_OFFSET	-4
 	ret
 	CFI_ENDPROC
 
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index e4f8010f22e0..f7ba9fbf12ee 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -216,6 +216,7 @@
 #define X86_FEATURE_PAUSEFILTER ( 8*32+13) /* AMD filtered pause intercept */
 #define X86_FEATURE_PFTHRESHOLD ( 8*32+14) /* AMD pause filter threshold */
 #define X86_FEATURE_VMMCALL     ( 8*32+15) /* Prefer vmmcall to vmcall */
+#define X86_FEATURE_XENPV       ( 8*32+16) /* "" Xen paravirtual guest */
 
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 10d0596433f8..c759b3cca663 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -19,6 +19,12 @@ static inline int paravirt_enabled(void)
 	return pv_info.paravirt_enabled;
 }
 
+static inline int paravirt_has_feature(unsigned int feature)
+{
+	WARN_ON_ONCE(!pv_info.paravirt_enabled);
+	return (pv_info.features & feature);
+}
+
 static inline void load_sp0(struct tss_struct *tss,
 			     struct thread_struct *thread)
 {
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 31247b5bff7c..3d44191185f8 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -70,9 +70,14 @@ struct pv_info {
 #endif
 
 	int paravirt_enabled;
+	unsigned int features;	  /* valid only if paravirt_enabled is set */
 	const char *name;
 };
 
+#define paravirt_has(x) paravirt_has_feature(PV_SUPPORTED_##x)
+/* Supported features */
+#define PV_SUPPORTED_RTC        (1<<0)
+
 struct pv_init_ops {
 	/*
 	 * Patch may replace one of the defined code sequences with
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 67522256c7ff..2d5a50cb61a2 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -472,6 +472,7 @@ static inline unsigned long current_top_of_stack(void)
 #else
 #define __cpuid			native_cpuid
 #define paravirt_enabled()	0
+#define paravirt_has(x) 	0
 
 static inline void load_sp0(struct tss_struct *tss,
 			    struct thread_struct *thread)
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 38dd5efdd04c..2bd2292a316d 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -193,20 +193,17 @@ static int __init numachip_system_init(void)
 	case 1:
 		init_extra_mapping_uc(NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_SIZE);
 		numachip_apic_icr_write = numachip1_apic_icr_write;
-		x86_init.pci.arch_init = pci_numachip_init;
 		break;
 	case 2:
 		init_extra_mapping_uc(NUMACHIP2_LCSR_BASE, NUMACHIP2_LCSR_SIZE);
 		numachip_apic_icr_write = numachip2_apic_icr_write;
-
-		/* Use MCFG config cycles rather than locked CF8 cycles */
-		raw_pci_ops = &pci_mmcfg;
 		break;
 	default:
 		return 0;
 	}
 
 	x86_cpuinit.fixup_cpu_id = fixup_cpu_id;
+	x86_init.pci.arch_init = pci_numachip_init;
 
 	return 0;
 }
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index c5b0d562dbf5..7e8a736d09db 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -999,6 +999,17 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 	int flags = MF_ACTION_REQUIRED;
 	int lmce = 0;
 
+	/* If this CPU is offline, just bail out. */
+	if (cpu_is_offline(smp_processor_id())) {
+		u64 mcgstatus;
+
+		mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
+		if (mcgstatus & MCG_STATUS_RIPV) {
+			mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
+			return;
+		}
+	}
+
 	ist_enter(regs);
 
 	this_cpu_inc(mce_exception_count);
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index cd9685235df9..4af8d063fb36 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -200,6 +200,9 @@ static __init int add_rtc_cmos(void)
 	}
 #endif
 
+	if (paravirt_enabled() && !paravirt_has(RTC))
+		return -ENODEV;
+
 	platform_device_register(&rtc_device);
 	dev_info(&rtc_device.dev,
 		 "registered platform RTC device (no PNP device found)\n");
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 08116ff227cc..b0ea42b78ccd 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -420,6 +420,7 @@ void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_s
 	u8 saved_mode;
 	if (hpet_legacy_start) {
 		/* save existing mode for later reenablement */
+		WARN_ON(channel != 0);
 		saved_mode = kvm->arch.vpit->pit_state.channels[0].mode;
 		kvm->arch.vpit->pit_state.channels[0].mode = 0xff; /* disable timer */
 		pit_load_count(kvm, channel, val);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7ffc224bbe41..97592e190413 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3606,7 +3606,8 @@ static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
 	       sizeof(kvm->arch.vpit->pit_state.channels));
 	kvm->arch.vpit->pit_state.flags = ps->flags;
 	for (i = 0; i < 3; i++)
-		kvm_pit_load_count(kvm, i, kvm->arch.vpit->pit_state.channels[i].count, start);
+		kvm_pit_load_count(kvm, i, kvm->arch.vpit->pit_state.channels[i].count,
+				   start && i == 0);
 	mutex_unlock(&kvm->arch.vpit->pit_state.lock);
 	return 0;
 }
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index a0d09f6c6533..a43b2eafc466 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1414,6 +1414,7 @@ __init void lguest_init(void)
 	pv_info.kernel_rpl = 1;
 	/* Everyone except Xen runs with this set. */
 	pv_info.shared_kernel_pmd = 1;
+	pv_info.features = 0;
 
 	/*
 	 * We set up all the lguest overrides for sensitive operations.  These
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 5774800ff583..b7de78bdc09c 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1192,7 +1192,7 @@ static const struct pv_info xen_info __initconst = {
 #ifdef CONFIG_X86_64
 	.extra_user_64bit_cs = FLAT_USER_CS64,
 #endif
-
+	.features = 0,
 	.name = "Xen",
 };
 
@@ -1535,6 +1535,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
 
 	/* Install Xen paravirt ops */
 	pv_info = xen_info;
+	if (xen_initial_domain())
+		pv_info.features |= PV_SUPPORTED_RTC;
 	pv_init_ops = xen_init_ops;
 	pv_apic_ops = xen_apic_ops;
 	if (!xen_pvh_domain()) {
@@ -1886,8 +1888,10 @@ EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
 
 static void xen_set_cpu_features(struct cpuinfo_x86 *c)
 {
-	if (xen_pv_domain())
+	if (xen_pv_domain()) {
 		clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
+		set_cpu_cap(c, X86_FEATURE_XENPV);
+	}
 }
 
 const struct hypervisor_x86 x86_hyper_xen = {
diff --git a/block/blk-merge.c b/block/blk-merge.c
index e73846a3d08a..e01405a3e8b3 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -81,7 +81,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
 	struct bio *new = NULL;
 
 	bio_for_each_segment(bv, bio, iter) {
-		if (sectors + (bv.bv_len >> 9) > blk_max_size_offset(q, bio->bi_iter.bi_sector))
+		if (sectors + (bv.bv_len >> 9) > queue_max_sectors(q))
 			goto split;
 
 		/*
diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
index f8c0b8dbeb75..88bc8e6b2a54 100644
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -53,7 +53,7 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
 	struct dmaengine_unmap_data *unmap = NULL;
 
 	if (device)
-		unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOIO);
+		unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOWAIT);
 
 	if (unmap && is_dma_copy_aligned(device, src_offset, dest_offset, len)) {
 		unsigned long dma_prep_flags = 0;
diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c
index 5d355e0c2633..c0748bbd4c08 100644
--- a/crypto/async_tx/async_pq.c
+++ b/crypto/async_tx/async_pq.c
@@ -188,7 +188,7 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
 	BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks)));
 
 	if (device)
-		unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOIO);
+		unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOWAIT);
 
 	/* XORing P/Q is only implemented in software */
 	if (unmap && !(submit->flags & ASYNC_TX_PQ_XOR_DST) &&
@@ -307,7 +307,7 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
 	BUG_ON(disks < 4);
 
 	if (device)
-		unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOIO);
+		unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOWAIT);
 
 	if (unmap && disks <= dma_maxpq(device, 0) &&
 	    is_dma_pq_aligned(device, offset, 0, len)) {
diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c
index 934a84981495..8fab6275ea1f 100644
--- a/crypto/async_tx/async_raid6_recov.c
+++ b/crypto/async_tx/async_raid6_recov.c
@@ -41,7 +41,7 @@ async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
 	u8 *a, *b, *c;
 
 	if (dma)
-		unmap = dmaengine_get_unmap_data(dma->dev, 3, GFP_NOIO);
+		unmap = dmaengine_get_unmap_data(dma->dev, 3, GFP_NOWAIT);
 
 	if (unmap) {
 		struct device *dev = dma->dev;
@@ -105,7 +105,7 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
 	u8 *d, *s;
 
 	if (dma)
-		unmap = dmaengine_get_unmap_data(dma->dev, 3, GFP_NOIO);
+		unmap = dmaengine_get_unmap_data(dma->dev, 3, GFP_NOWAIT);
 
 	if (unmap) {
 		dma_addr_t dma_dest[2];
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index e1bce26cd4f9..da75777f2b3f 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -182,7 +182,7 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
 	BUG_ON(src_cnt <= 1);
 
 	if (device)
-		unmap = dmaengine_get_unmap_data(device->dev, src_cnt+1, GFP_NOIO);
+		unmap = dmaengine_get_unmap_data(device->dev, src_cnt+1, GFP_NOWAIT);
 
 	if (unmap && is_dma_xor_aligned(device, offset, 0, len)) {
 		struct dma_async_tx_descriptor *tx;
@@ -278,7 +278,7 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
 	BUG_ON(src_cnt <= 1);
 
 	if (device)
-		unmap = dmaengine_get_unmap_data(device->dev, src_cnt, GFP_NOIO);
+		unmap = dmaengine_get_unmap_data(device->dev, src_cnt, GFP_NOWAIT);
 
 	if (unmap && src_cnt <= device->max_xor &&
 	    is_dma_xor_aligned(device, offset, 0, len)) {
diff --git a/drivers/acpi/device_sysfs.c b/drivers/acpi/device_sysfs.c
index 707cf6213bc2..b9afb47db7ed 100644
--- a/drivers/acpi/device_sysfs.c
+++ b/drivers/acpi/device_sysfs.c
@@ -104,7 +104,7 @@ static void acpi_expose_nondev_subnodes(struct kobject *kobj,
 
 		init_completion(&dn->kobj_done);
 		ret = kobject_init_and_add(&dn->kobj, &acpi_data_node_ktype,
-					   kobj, dn->name);
+					   kobj, "%s", dn->name);
 		if (ret)
 			acpi_handle_err(dn->handle, "Failed to expose (%d)\n", ret);
 		else
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index d7373ca69c99..25693b045371 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -179,26 +179,21 @@ static int cn_call_callback(struct sk_buff *skb)
  *
  * It checks skb, netlink header and msg sizes, and calls callback helper.
  */
-static void cn_rx_skb(struct sk_buff *__skb)
+static void cn_rx_skb(struct sk_buff *skb)
 {
 	struct nlmsghdr *nlh;
-	struct sk_buff *skb;
 	int len, err;
 
-	skb = skb_get(__skb);
-
 	if (skb->len >= NLMSG_HDRLEN) {
 		nlh = nlmsg_hdr(skb);
 		len = nlmsg_len(nlh);
 
 		if (len < (int)sizeof(struct cn_msg) ||
 		    skb->len < nlh->nlmsg_len ||
-		    len > CONNECTOR_MAX_MSG_SIZE) {
-			kfree_skb(skb);
+		    len > CONNECTOR_MAX_MSG_SIZE)
 			return;
-		}
 
-		err = cn_call_callback(skb);
+		err = cn_call_callback(skb_get(skb));
 		if (err < 0)
 			kfree_skb(skb);
 	}
diff --git a/drivers/dma/mic_x100_dma.c b/drivers/dma/mic_x100_dma.c
index cddfa8dbf4bd..068e920ecb68 100644
--- a/drivers/dma/mic_x100_dma.c
+++ b/drivers/dma/mic_x100_dma.c
@@ -317,7 +317,6 @@ mic_dma_prep_memcpy_lock(struct dma_chan *ch, dma_addr_t dma_dest,
 	struct mic_dma_chan *mic_ch = to_mic_dma_chan(ch);
 	struct device *dev = mic_dma_ch_to_device(mic_ch);
 	int result;
-	struct dma_async_tx_descriptor *tx = NULL;
 
 	if (!len && !flags)
 		return NULL;
@@ -325,13 +324,10 @@ mic_dma_prep_memcpy_lock(struct dma_chan *ch, dma_addr_t dma_dest,
 	spin_lock(&mic_ch->prep_lock);
 	result = mic_dma_do_dma(mic_ch, flags, dma_src, dma_dest, len);
 	if (result >= 0)
-		tx = allocate_tx(mic_ch);
-
-	if (!tx)
-		dev_err(dev, "Error enqueueing dma, error=%d\n", result);
-
+		return allocate_tx(mic_ch);
+	dev_err(dev, "Error enqueueing dma, error=%d\n", result);
 	spin_unlock(&mic_ch->prep_lock);
-	return tx;
+	return NULL;
 }
 
 static struct dma_async_tx_descriptor *
@@ -339,14 +335,13 @@ mic_dma_prep_interrupt_lock(struct dma_chan *ch, unsigned long flags)
 {
 	struct mic_dma_chan *mic_ch = to_mic_dma_chan(ch);
 	int ret;
-	struct dma_async_tx_descriptor *tx = NULL;
 
 	spin_lock(&mic_ch->prep_lock);
 	ret = mic_dma_do_dma(mic_ch, flags, 0, 0, 0);
 	if (!ret)
-		tx = allocate_tx(mic_ch);
+		return allocate_tx(mic_ch);
 	spin_unlock(&mic_ch->prep_lock);
-	return tx;
+	return NULL;
 }
 
 /* Return the status of the transaction */
diff --git a/drivers/dma/xgene-dma.c b/drivers/dma/xgene-dma.c
index 9dfa2b0fa5da..9cb93c5b655d 100644
--- a/drivers/dma/xgene-dma.c
+++ b/drivers/dma/xgene-dma.c
@@ -29,6 +29,7 @@
 #include <linux/dmapool.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
+#include <linux/irq.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
 
@@ -1610,6 +1611,7 @@ static int xgene_dma_request_irqs(struct xgene_dma *pdma)
 	/* Register DMA channel rx irq */
 	for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) {
 		chan = &pdma->chan[i];
+		irq_set_status_flags(chan->rx_irq, IRQ_DISABLE_UNLAZY);
 		ret = devm_request_irq(chan->dev, chan->rx_irq,
 				       xgene_dma_chan_ring_isr,
 				       0, chan->name, chan);
@@ -1620,6 +1622,7 @@ static int xgene_dma_request_irqs(struct xgene_dma *pdma)
 
 			for (j = 0; j < i; j++) {
 				chan = &pdma->chan[i];
+				irq_clear_status_flags(chan->rx_irq, IRQ_DISABLE_UNLAZY);
 				devm_free_irq(chan->dev, chan->rx_irq, chan);
 			}
 
@@ -1640,6 +1643,7 @@ static void xgene_dma_free_irqs(struct xgene_dma *pdma)
 
 	for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) {
 		chan = &pdma->chan[i];
+		irq_clear_status_flags(chan->rx_irq, IRQ_DISABLE_UNLAZY);
 		devm_free_irq(chan->dev, chan->rx_irq, chan);
 	}
 }
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index ac1ce4a73edf..0e08e665f715 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -521,6 +521,7 @@ static int __init dmi_present(const u8 *buf)
 			dmi_ver = smbios_ver;
 		else
 			dmi_ver = (buf[14] & 0xF0) << 4 | (buf[14] & 0x0F);
+		dmi_ver <<= 8;
 		dmi_num = get_unaligned_le16(buf + 12);
 		dmi_len = get_unaligned_le16(buf + 6);
 		dmi_base = get_unaligned_le32(buf + 8);
@@ -528,15 +529,14 @@ static int __init dmi_present(const u8 *buf)
 		if (dmi_walk_early(dmi_decode) == 0) {
 			if (smbios_ver) {
 				pr_info("SMBIOS %d.%d present.\n",
-				       dmi_ver >> 8, dmi_ver & 0xFF);
+					dmi_ver >> 16, (dmi_ver >> 8) & 0xFF);
 			} else {
 				smbios_entry_point_size = 15;
 				memcpy(smbios_entry_point, buf,
 				       smbios_entry_point_size);
 				pr_info("Legacy DMI %d.%d present.\n",
-				       dmi_ver >> 8, dmi_ver & 0xFF);
+					dmi_ver >> 16, (dmi_ver >> 8) & 0xFF);
 			}
-			dmi_ver <<= 8;
 			dmi_format_ids(dmi_ids_string, sizeof(dmi_ids_string));
 			printk(KERN_DEBUG "DMI: %s\n", dmi_ids_string);
 			return 0;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c
index ffa902ece872..05a895496fc6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c
@@ -156,6 +156,7 @@ nv40_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
 		return -ENOMEM;
 	nvkm_object_ctor(&nv40_gr_chan, oclass, &chan->object);
 	chan->gr = gr;
+	chan->fifo = fifoch;
 	*pobject = &chan->object;
 
 	spin_lock_irqsave(&chan->gr->base.engine.lock, flags);
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index b9094e9da537..a1e75cba18e0 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -263,81 +263,6 @@ config EXYNOS_IOMMU_DEBUG
 
 	  Say N unless you need kernel log message for IOMMU debugging.
 
-config SHMOBILE_IPMMU
-	bool
-
-config SHMOBILE_IPMMU_TLB
-	bool
-
-config SHMOBILE_IOMMU
-	bool "IOMMU for Renesas IPMMU/IPMMUI"
-	default n
-	depends on ARM && MMU
-	depends on ARCH_SHMOBILE || COMPILE_TEST
-	select IOMMU_API
-	select ARM_DMA_USE_IOMMU
-	select SHMOBILE_IPMMU
-	select SHMOBILE_IPMMU_TLB
-	help
-	  Support for Renesas IPMMU/IPMMUI. This option enables
-	  remapping of DMA memory accesses from all of the IP blocks
-	  on the ICB.
-
-	  Warning: Drivers (including userspace drivers of UIO
-	  devices) of the IP blocks on the ICB *must* use addresses
-	  allocated from the IPMMU (iova) for DMA with this option
-	  enabled.
-
-	  If unsure, say N.
-
-choice
-	prompt "IPMMU/IPMMUI address space size"
-	default SHMOBILE_IOMMU_ADDRSIZE_2048MB
-	depends on SHMOBILE_IOMMU
-	help
-	  This option sets IPMMU/IPMMUI address space size by
-	  adjusting the 1st level page table size. The page table size
-	  is calculated as follows:
-
-	      page table size = number of page table entries * 4 bytes
-	      number of page table entries = address space size / 1 MiB
-
-	  For example, when the address space size is 2048 MiB, the
-	  1st level page table size is 8192 bytes.
-
-	config SHMOBILE_IOMMU_ADDRSIZE_2048MB
-		bool "2 GiB"
-
-	config SHMOBILE_IOMMU_ADDRSIZE_1024MB
-		bool "1 GiB"
-
-	config SHMOBILE_IOMMU_ADDRSIZE_512MB
-		bool "512 MiB"
-
-	config SHMOBILE_IOMMU_ADDRSIZE_256MB
-		bool "256 MiB"
-
-	config SHMOBILE_IOMMU_ADDRSIZE_128MB
-		bool "128 MiB"
-
-	config SHMOBILE_IOMMU_ADDRSIZE_64MB
-		bool "64 MiB"
-
-	config SHMOBILE_IOMMU_ADDRSIZE_32MB
-		bool "32 MiB"
-
-endchoice
-
-config SHMOBILE_IOMMU_L1SIZE
-	int
-	default 8192 if SHMOBILE_IOMMU_ADDRSIZE_2048MB
-	default 4096 if SHMOBILE_IOMMU_ADDRSIZE_1024MB
-	default 2048 if SHMOBILE_IOMMU_ADDRSIZE_512MB
-	default 1024 if SHMOBILE_IOMMU_ADDRSIZE_256MB
-	default 512 if SHMOBILE_IOMMU_ADDRSIZE_128MB
-	default 256 if SHMOBILE_IOMMU_ADDRSIZE_64MB
-	default 128 if SHMOBILE_IOMMU_ADDRSIZE_32MB
-
 config IPMMU_VMSA
 	bool "Renesas VMSA-compatible IPMMU"
 	depends on ARM_LPAE
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 68faca02225d..42fc0c25cf1a 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -22,7 +22,5 @@ obj-$(CONFIG_ROCKCHIP_IOMMU) += rockchip-iommu.o
 obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o
 obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o
 obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o
-obj-$(CONFIG_SHMOBILE_IOMMU) += shmobile-iommu.o
-obj-$(CONFIG_SHMOBILE_IPMMU) += shmobile-ipmmu.o
 obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
 obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 8b2be1e7714f..539b0dea8034 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -35,6 +35,7 @@
 #include <linux/msi.h>
 #include <linux/dma-contiguous.h>
 #include <linux/irqdomain.h>
+#include <linux/percpu.h>
 #include <asm/irq_remapping.h>
 #include <asm/io_apic.h>
 #include <asm/apic.h>
@@ -114,6 +115,45 @@ struct kmem_cache *amd_iommu_irq_cache;
 static void update_domain(struct protection_domain *domain);
 static int protection_domain_init(struct protection_domain *domain);
 
+/*
+ * For dynamic growth the aperture size is split into ranges of 128MB of
+ * DMA address space each. This struct represents one such range.
+ */
+struct aperture_range {
+
+	spinlock_t bitmap_lock;
+
+	/* address allocation bitmap */
+	unsigned long *bitmap;
+	unsigned long offset;
+	unsigned long next_bit;
+
+	/*
+	 * Array of PTE pages for the aperture. In this array we save all the
+	 * leaf pages of the domain page table used for the aperture. This way
+	 * we don't need to walk the page table to find a specific PTE. We can
+	 * just calculate its address in constant time.
+	 */
+	u64 *pte_pages[64];
+};
+
+/*
+ * Data container for a dma_ops specific protection domain
+ */
+struct dma_ops_domain {
+	/* generic protection domain information */
+	struct protection_domain domain;
+
+	/* size of the aperture for the mappings */
+	unsigned long aperture_size;
+
+	/* aperture index we start searching for free addresses */
+	u32 __percpu *next_index;
+
+	/* address space relevant data */
+	struct aperture_range *aperture[APERTURE_MAX_RANGES];
+};
+
 /****************************************************************************
  *
  * Helper functions
@@ -1167,11 +1207,21 @@ static u64 *alloc_pte(struct protection_domain *domain,
 	end_lvl = PAGE_SIZE_LEVEL(page_size);
 
 	while (level > end_lvl) {
-		if (!IOMMU_PTE_PRESENT(*pte)) {
+		u64 __pte, __npte;
+
+		__pte = *pte;
+
+		if (!IOMMU_PTE_PRESENT(__pte)) {
 			page = (u64 *)get_zeroed_page(gfp);
 			if (!page)
 				return NULL;
-			*pte = PM_LEVEL_PDE(level, virt_to_phys(page));
+
+			__npte = PM_LEVEL_PDE(level, virt_to_phys(page));
+
+			if (cmpxchg64(pte, __pte, __npte)) {
+				free_page((unsigned long)page);
+				continue;
+			}
 		}
 
 		/* No level skipping support yet */
@@ -1376,8 +1426,10 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom,
 			   bool populate, gfp_t gfp)
 {
 	int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
-	struct amd_iommu *iommu;
 	unsigned long i, old_size, pte_pgsize;
+	struct aperture_range *range;
+	struct amd_iommu *iommu;
+	unsigned long flags;
 
 #ifdef CONFIG_IOMMU_STRESS
 	populate = false;
@@ -1386,15 +1438,17 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom,
 	if (index >= APERTURE_MAX_RANGES)
 		return -ENOMEM;
 
-	dma_dom->aperture[index] = kzalloc(sizeof(struct aperture_range), gfp);
-	if (!dma_dom->aperture[index])
+	range = kzalloc(sizeof(struct aperture_range), gfp);
+	if (!range)
 		return -ENOMEM;
 
-	dma_dom->aperture[index]->bitmap = (void *)get_zeroed_page(gfp);
-	if (!dma_dom->aperture[index]->bitmap)
+	range->bitmap = (void *)get_zeroed_page(gfp);
+	if (!range->bitmap)
 		goto out_free;
 
-	dma_dom->aperture[index]->offset = dma_dom->aperture_size;
+	range->offset = dma_dom->aperture_size;
+
+	spin_lock_init(&range->bitmap_lock);
 
 	if (populate) {
 		unsigned long address = dma_dom->aperture_size;
@@ -1407,14 +1461,20 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom,
 			if (!pte)
 				goto out_free;
 
-			dma_dom->aperture[index]->pte_pages[i] = pte_page;
+			range->pte_pages[i] = pte_page;
 
 			address += APERTURE_RANGE_SIZE / 64;
 		}
 	}
 
-	old_size                = dma_dom->aperture_size;
-	dma_dom->aperture_size += APERTURE_RANGE_SIZE;
+	spin_lock_irqsave(&dma_dom->domain.lock, flags);
+
+	/* First take the bitmap_lock and then publish the range */
+	spin_lock(&range->bitmap_lock);
+
+	old_size                 = dma_dom->aperture_size;
+	dma_dom->aperture[index] = range;
+	dma_dom->aperture_size  += APERTURE_RANGE_SIZE;
 
 	/* Reserve address range used for MSI messages */
 	if (old_size < MSI_ADDR_BASE_LO &&
@@ -1461,62 +1521,123 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom,
 
 	update_domain(&dma_dom->domain);
 
+	spin_unlock(&range->bitmap_lock);
+
+	spin_unlock_irqrestore(&dma_dom->domain.lock, flags);
+
 	return 0;
 
 out_free:
 	update_domain(&dma_dom->domain);
 
-	free_page((unsigned long)dma_dom->aperture[index]->bitmap);
+	free_page((unsigned long)range->bitmap);
 
-	kfree(dma_dom->aperture[index]);
-	dma_dom->aperture[index] = NULL;
+	kfree(range);
 
 	return -ENOMEM;
 }
 
+static dma_addr_t dma_ops_aperture_alloc(struct dma_ops_domain *dom,
+					 struct aperture_range *range,
+					 unsigned long pages,
+					 unsigned long dma_mask,
+					 unsigned long boundary_size,
+					 unsigned long align_mask,
+					 bool trylock)
+{
+	unsigned long offset, limit, flags;
+	dma_addr_t address;
+	bool flush = false;
+
+	offset = range->offset >> PAGE_SHIFT;
+	limit  = iommu_device_max_index(APERTURE_RANGE_PAGES, offset,
+					dma_mask >> PAGE_SHIFT);
+
+	if (trylock) {
+		if (!spin_trylock_irqsave(&range->bitmap_lock, flags))
+			return -1;
+	} else {
+		spin_lock_irqsave(&range->bitmap_lock, flags);
+	}
+
+	address = iommu_area_alloc(range->bitmap, limit, range->next_bit,
+				   pages, offset, boundary_size, align_mask);
+	if (address == -1) {
+		/* Nothing found, retry one time */
+		address = iommu_area_alloc(range->bitmap, limit,
+					   0, pages, offset, boundary_size,
+					   align_mask);
+		flush = true;
+	}
+
+	if (address != -1)
+		range->next_bit = address + pages;
+
+	spin_unlock_irqrestore(&range->bitmap_lock, flags);
+
+	if (flush) {
+		domain_flush_tlb(&dom->domain);
+		domain_flush_complete(&dom->domain);
+	}
+
+	return address;
+}
+
 static unsigned long dma_ops_area_alloc(struct device *dev,
 					struct dma_ops_domain *dom,
 					unsigned int pages,
 					unsigned long align_mask,
-					u64 dma_mask,
-					unsigned long start)
+					u64 dma_mask)
 {
-	unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE;
-	int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT;
-	int i = start >> APERTURE_RANGE_SHIFT;
 	unsigned long boundary_size, mask;
 	unsigned long address = -1;
-	unsigned long limit;
+	bool first = true;
+	u32 start, i;
 
-	next_bit >>= PAGE_SHIFT;
+	preempt_disable();
 
 	mask = dma_get_seg_boundary(dev);
 
+again:
+	start = this_cpu_read(*dom->next_index);
+
+	/* Sanity check - is it really necessary? */
+	if (unlikely(start > APERTURE_MAX_RANGES)) {
+		start = 0;
+		this_cpu_write(*dom->next_index, 0);
+	}
+
 	boundary_size = mask + 1 ? ALIGN(mask + 1, PAGE_SIZE) >> PAGE_SHIFT :
 				   1UL << (BITS_PER_LONG - PAGE_SHIFT);
 
-	for (;i < max_index; ++i) {
-		unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT;
+	for (i = 0; i < APERTURE_MAX_RANGES; ++i) {
+		struct aperture_range *range;
+		int index;
 
-		if (dom->aperture[i]->offset >= dma_mask)
-			break;
+		index = (start + i) % APERTURE_MAX_RANGES;
 
-		limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset,
-					       dma_mask >> PAGE_SHIFT);
+		range = dom->aperture[index];
 
-		address = iommu_area_alloc(dom->aperture[i]->bitmap,
-					   limit, next_bit, pages, 0,
-					    boundary_size, align_mask);
+		if (!range || range->offset >= dma_mask)
+			continue;
+
+		address = dma_ops_aperture_alloc(dom, range, pages,
+						 dma_mask, boundary_size,
+						 align_mask, first);
 		if (address != -1) {
-			address = dom->aperture[i]->offset +
-				  (address << PAGE_SHIFT);
-			dom->next_address = address + (pages << PAGE_SHIFT);
+			address = range->offset + (address << PAGE_SHIFT);
+			this_cpu_write(*dom->next_index, index);
 			break;
 		}
+	}
 
-		next_bit = 0;
+	if (address == -1 && first) {
+		first = false;
+		goto again;
 	}
 
+	preempt_enable();
+
 	return address;
 }
 
@@ -1526,21 +1647,14 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev,
 					     unsigned long align_mask,
 					     u64 dma_mask)
 {
-	unsigned long address;
-
-#ifdef CONFIG_IOMMU_STRESS
-	dom->next_address = 0;
-	dom->need_flush = true;
-#endif
+	unsigned long address = -1;
 
-	address = dma_ops_area_alloc(dev, dom, pages, align_mask,
-				     dma_mask, dom->next_address);
+	while (address == -1) {
+		address = dma_ops_area_alloc(dev, dom, pages,
+					     align_mask, dma_mask);
 
-	if (address == -1) {
-		dom->next_address = 0;
-		address = dma_ops_area_alloc(dev, dom, pages, align_mask,
-					     dma_mask, 0);
-		dom->need_flush = true;
+		if (address == -1 && alloc_new_range(dom, false, GFP_ATOMIC))
+			break;
 	}
 
 	if (unlikely(address == -1))
@@ -1562,6 +1676,7 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
 {
 	unsigned i = address >> APERTURE_RANGE_SHIFT;
 	struct aperture_range *range = dom->aperture[i];
+	unsigned long flags;
 
 	BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL);
 
@@ -1570,12 +1685,18 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
 		return;
 #endif
 
-	if (address >= dom->next_address)
-		dom->need_flush = true;
+	if (amd_iommu_unmap_flush) {
+		domain_flush_tlb(&dom->domain);
+		domain_flush_complete(&dom->domain);
+	}
 
 	address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT;
 
+	spin_lock_irqsave(&range->bitmap_lock, flags);
+	if (address + pages > range->next_bit)
+		range->next_bit = address + pages;
 	bitmap_clear(range->bitmap, address, pages);
+	spin_unlock_irqrestore(&range->bitmap_lock, flags);
 
 }
 
@@ -1755,6 +1876,8 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
 	if (!dom)
 		return;
 
+	free_percpu(dom->next_index);
+
 	del_domain_from_list(&dom->domain);
 
 	free_pagetable(&dom->domain);
@@ -1769,6 +1892,23 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
 	kfree(dom);
 }
 
+static int dma_ops_domain_alloc_apertures(struct dma_ops_domain *dma_dom,
+					  int max_apertures)
+{
+	int ret, i, apertures;
+
+	apertures = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
+	ret       = 0;
+
+	for (i = apertures; i < max_apertures; ++i) {
+		ret = alloc_new_range(dma_dom, false, GFP_KERNEL);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
 /*
  * Allocates a new protection domain usable for the dma_ops functions.
  * It also initializes the page table and the address allocator data
@@ -1777,6 +1917,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
 static struct dma_ops_domain *dma_ops_domain_alloc(void)
 {
 	struct dma_ops_domain *dma_dom;
+	int cpu;
 
 	dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
 	if (!dma_dom)
@@ -1785,6 +1926,10 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
 	if (protection_domain_init(&dma_dom->domain))
 		goto free_dma_dom;
 
+	dma_dom->next_index = alloc_percpu(u32);
+	if (!dma_dom->next_index)
+		goto free_dma_dom;
+
 	dma_dom->domain.mode = PAGE_MODE_2_LEVEL;
 	dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
 	dma_dom->domain.flags = PD_DMA_OPS_MASK;
@@ -1792,8 +1937,6 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
 	if (!dma_dom->domain.pt_root)
 		goto free_dma_dom;
 
-	dma_dom->need_flush = false;
-
 	add_domain_to_list(&dma_dom->domain);
 
 	if (alloc_new_range(dma_dom, true, GFP_KERNEL))
@@ -1804,8 +1947,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
 	 * a valid dma-address. So we can use 0 as error value
 	 */
 	dma_dom->aperture[0]->bitmap[0] = 1;
-	dma_dom->next_address = 0;
 
+	for_each_possible_cpu(cpu)
+		*per_cpu_ptr(dma_dom->next_index, cpu) = 0;
 
 	return dma_dom;
 
@@ -2328,7 +2472,7 @@ static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom,
 	else if (direction == DMA_BIDIRECTIONAL)
 		__pte |= IOMMU_PTE_IR | IOMMU_PTE_IW;
 
-	WARN_ON(*pte);
+	WARN_ON_ONCE(*pte);
 
 	*pte = __pte;
 
@@ -2357,7 +2501,7 @@ static void dma_ops_domain_unmap(struct dma_ops_domain *dom,
 
 	pte += PM_LEVEL_INDEX(0, address);
 
-	WARN_ON(!*pte);
+	WARN_ON_ONCE(!*pte);
 
 	*pte = 0ULL;
 }
@@ -2393,26 +2537,11 @@ static dma_addr_t __map_single(struct device *dev,
 	if (align)
 		align_mask = (1UL << get_order(size)) - 1;
 
-retry:
 	address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
 					  dma_mask);
-	if (unlikely(address == DMA_ERROR_CODE)) {
-		/*
-		 * setting next_address here will let the address
-		 * allocator only scan the new allocated range in the
-		 * first run. This is a small optimization.
-		 */
-		dma_dom->next_address = dma_dom->aperture_size;
 
-		if (alloc_new_range(dma_dom, false, GFP_ATOMIC))
-			goto out;
-
-		/*
-		 * aperture was successfully enlarged by 128 MB, try
-		 * allocation again
-		 */
-		goto retry;
-	}
+	if (address == DMA_ERROR_CODE)
+		goto out;
 
 	start = address;
 	for (i = 0; i < pages; ++i) {
@@ -2427,11 +2556,10 @@ retry:
 
 	ADD_STATS_COUNTER(alloced_io_mem, size);
 
-	if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
-		domain_flush_tlb(&dma_dom->domain);
-		dma_dom->need_flush = false;
-	} else if (unlikely(amd_iommu_np_cache))
+	if (unlikely(amd_iommu_np_cache)) {
 		domain_flush_pages(&dma_dom->domain, address, size);
+		domain_flush_complete(&dma_dom->domain);
+	}
 
 out:
 	return address;
@@ -2478,11 +2606,6 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
 	SUB_STATS_COUNTER(alloced_io_mem, size);
 
 	dma_ops_free_addresses(dma_dom, dma_addr, pages);
-
-	if (amd_iommu_unmap_flush || dma_dom->need_flush) {
-		domain_flush_pages(&dma_dom->domain, flush_addr, size);
-		dma_dom->need_flush = false;
-	}
 }
 
 /*
@@ -2493,11 +2616,9 @@ static dma_addr_t map_page(struct device *dev, struct page *page,
 			   enum dma_data_direction dir,
 			   struct dma_attrs *attrs)
 {
-	unsigned long flags;
+	phys_addr_t paddr = page_to_phys(page) + offset;
 	struct protection_domain *domain;
-	dma_addr_t addr;
 	u64 dma_mask;
-	phys_addr_t paddr = page_to_phys(page) + offset;
 
 	INC_STATS_COUNTER(cnt_map_single);
 
@@ -2509,19 +2630,8 @@ static dma_addr_t map_page(struct device *dev, struct page *page,
 
 	dma_mask = *dev->dma_mask;
 
-	spin_lock_irqsave(&domain->lock, flags);
-
-	addr = __map_single(dev, domain->priv, paddr, size, dir, false,
+	return __map_single(dev, domain->priv, paddr, size, dir, false,
 			    dma_mask);
-	if (addr == DMA_ERROR_CODE)
-		goto out;
-
-	domain_flush_complete(domain);
-
-out:
-	spin_unlock_irqrestore(&domain->lock, flags);
-
-	return addr;
 }
 
 /*
@@ -2530,7 +2640,6 @@ out:
 static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
 		       enum dma_data_direction dir, struct dma_attrs *attrs)
 {
-	unsigned long flags;
 	struct protection_domain *domain;
 
 	INC_STATS_COUNTER(cnt_unmap_single);
@@ -2539,13 +2648,7 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
 	if (IS_ERR(domain))
 		return;
 
-	spin_lock_irqsave(&domain->lock, flags);
-
 	__unmap_single(domain->priv, dma_addr, size, dir);
-
-	domain_flush_complete(domain);
-
-	spin_unlock_irqrestore(&domain->lock, flags);
 }
 
 /*
@@ -2556,7 +2659,6 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
 		  int nelems, enum dma_data_direction dir,
 		  struct dma_attrs *attrs)
 {
-	unsigned long flags;
 	struct protection_domain *domain;
 	int i;
 	struct scatterlist *s;
@@ -2572,8 +2674,6 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
 
 	dma_mask = *dev->dma_mask;
 
-	spin_lock_irqsave(&domain->lock, flags);
-
 	for_each_sg(sglist, s, nelems, i) {
 		paddr = sg_phys(s);
 
@@ -2588,12 +2688,8 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
 			goto unmap;
 	}
 
-	domain_flush_complete(domain);
-
-out:
-	spin_unlock_irqrestore(&domain->lock, flags);
-
 	return mapped_elems;
+
 unmap:
 	for_each_sg(sglist, s, mapped_elems, i) {
 		if (s->dma_address)
@@ -2602,9 +2698,7 @@ unmap:
 		s->dma_address = s->dma_length = 0;
 	}
 
-	mapped_elems = 0;
-
-	goto out;
+	return 0;
 }
 
 /*
@@ -2615,7 +2709,6 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
 		     int nelems, enum dma_data_direction dir,
 		     struct dma_attrs *attrs)
 {
-	unsigned long flags;
 	struct protection_domain *domain;
 	struct scatterlist *s;
 	int i;
@@ -2626,17 +2719,11 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
 	if (IS_ERR(domain))
 		return;
 
-	spin_lock_irqsave(&domain->lock, flags);
-
 	for_each_sg(sglist, s, nelems, i) {
 		__unmap_single(domain->priv, s->dma_address,
 			       s->dma_length, dir);
 		s->dma_address = s->dma_length = 0;
 	}
-
-	domain_flush_complete(domain);
-
-	spin_unlock_irqrestore(&domain->lock, flags);
 }
 
 /*
@@ -2648,7 +2735,6 @@ static void *alloc_coherent(struct device *dev, size_t size,
 {
 	u64 dma_mask = dev->coherent_dma_mask;
 	struct protection_domain *domain;
-	unsigned long flags;
 	struct page *page;
 
 	INC_STATS_COUNTER(cnt_alloc_coherent);
@@ -2680,19 +2766,11 @@ static void *alloc_coherent(struct device *dev, size_t size,
 	if (!dma_mask)
 		dma_mask = *dev->dma_mask;
 
-	spin_lock_irqsave(&domain->lock, flags);
-
 	*dma_addr = __map_single(dev, domain->priv, page_to_phys(page),
 				 size, DMA_BIDIRECTIONAL, true, dma_mask);
 
-	if (*dma_addr == DMA_ERROR_CODE) {
-		spin_unlock_irqrestore(&domain->lock, flags);
+	if (*dma_addr == DMA_ERROR_CODE)
 		goto out_free;
-	}
-
-	domain_flush_complete(domain);
-
-	spin_unlock_irqrestore(&domain->lock, flags);
 
 	return page_address(page);
 
@@ -2712,7 +2790,6 @@ static void free_coherent(struct device *dev, size_t size,
 			  struct dma_attrs *attrs)
 {
 	struct protection_domain *domain;
-	unsigned long flags;
 	struct page *page;
 
 	INC_STATS_COUNTER(cnt_free_coherent);
@@ -2724,14 +2801,8 @@ static void free_coherent(struct device *dev, size_t size,
 	if (IS_ERR(domain))
 		goto free_mem;
 
-	spin_lock_irqsave(&domain->lock, flags);
-
 	__unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
 
-	domain_flush_complete(domain);
-
-	spin_unlock_irqrestore(&domain->lock, flags);
-
 free_mem:
 	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
 		__free_pages(page, get_order(size));
@@ -2746,14 +2817,43 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask)
 	return check_device(dev);
 }
 
+static int set_dma_mask(struct device *dev, u64 mask)
+{
+	struct protection_domain *domain;
+	int max_apertures = 1;
+
+	domain = get_domain(dev);
+	if (IS_ERR(domain))
+		return PTR_ERR(domain);
+
+	if (mask == DMA_BIT_MASK(64))
+		max_apertures = 8;
+	else if (mask > DMA_BIT_MASK(32))
+		max_apertures = 4;
+
+	/*
+	 * To prevent lock contention it doesn't make sense to allocate more
+	 * apertures than online cpus
+	 */
+	if (max_apertures > num_online_cpus())
+		max_apertures = num_online_cpus();
+
+	if (dma_ops_domain_alloc_apertures(domain->priv, max_apertures))
+		dev_err(dev, "Can't allocate %d iommu apertures\n",
+			max_apertures);
+
+	return 0;
+}
+
 static struct dma_map_ops amd_iommu_dma_ops = {
-	.alloc = alloc_coherent,
-	.free = free_coherent,
-	.map_page = map_page,
-	.unmap_page = unmap_page,
-	.map_sg = map_sg,
-	.unmap_sg = unmap_sg,
-	.dma_supported = amd_iommu_dma_supported,
+	.alloc		= alloc_coherent,
+	.free		= free_coherent,
+	.map_page	= map_page,
+	.unmap_page	= unmap_page,
+	.map_sg		= map_sg,
+	.unmap_sg	= unmap_sg,
+	.dma_supported	= amd_iommu_dma_supported,
+	.set_dma_mask	= set_dma_mask,
 };
 
 int __init amd_iommu_init_api(void)
@@ -3757,11 +3857,9 @@ static struct irq_domain *get_irq_domain(struct irq_alloc_info *info)
 	case X86_IRQ_ALLOC_TYPE_MSI:
 	case X86_IRQ_ALLOC_TYPE_MSIX:
 		devid = get_device_id(&info->msi_dev->dev);
-		if (devid >= 0) {
-			iommu = amd_iommu_rlookup_table[devid];
-			if (iommu)
-				return iommu->msi_domain;
-		}
+		iommu = amd_iommu_rlookup_table[devid];
+		if (iommu)
+			return iommu->msi_domain;
 		break;
 	default:
 		break;
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index b08cf57bf455..9d32b20a5e9a 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -425,46 +425,6 @@ struct protection_domain {
 };
 
 /*
- * For dynamic growth the aperture size is split into ranges of 128MB of
- * DMA address space each. This struct represents one such range.
- */
-struct aperture_range {
-
-	/* address allocation bitmap */
-	unsigned long *bitmap;
-
-	/*
-	 * Array of PTE pages for the aperture. In this array we save all the
-	 * leaf pages of the domain page table used for the aperture. This way
-	 * we don't need to walk the page table to find a specific PTE. We can
-	 * just calculate its address in constant time.
-	 */
-	u64 *pte_pages[64];
-
-	unsigned long offset;
-};
-
-/*
- * Data container for a dma_ops specific protection domain
- */
-struct dma_ops_domain {
-	/* generic protection domain information */
-	struct protection_domain domain;
-
-	/* size of the aperture for the mappings */
-	unsigned long aperture_size;
-
-	/* address we start to search for free addresses */
-	unsigned long next_address;
-
-	/* address space relevant data */
-	struct aperture_range *aperture[APERTURE_MAX_RANGES];
-
-	/* This will be set to true when TLB needs to be flushed */
-	bool need_flush;
-};
-
-/*
  * Structure where we save information about one hardware AMD IOMMU in the
  * system.
  */
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index 7caf2fa237f2..c865737326e1 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -432,7 +432,7 @@ static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm)
 	unbind_pasid(pasid_state);
 }
 
-static struct mmu_notifier_ops iommu_mn = {
+static const struct mmu_notifier_ops iommu_mn = {
 	.release		= mn_release,
 	.clear_flush_young      = mn_clear_flush_young,
 	.invalidate_page        = mn_invalidate_page,
@@ -513,43 +513,39 @@ static bool access_error(struct vm_area_struct *vma, struct fault *fault)
 static void do_fault(struct work_struct *work)
 {
 	struct fault *fault = container_of(work, struct fault, work);
-	struct mm_struct *mm;
 	struct vm_area_struct *vma;
+	int ret = VM_FAULT_ERROR;
+	unsigned int flags = 0;
+	struct mm_struct *mm;
 	u64 address;
-	int ret, write;
-
-	write = !!(fault->flags & PPR_FAULT_WRITE);
 
 	mm = fault->state->mm;
 	address = fault->address;
 
+	if (fault->flags & PPR_FAULT_USER)
+		flags |= FAULT_FLAG_USER;
+	if (fault->flags & PPR_FAULT_WRITE)
+		flags |= FAULT_FLAG_WRITE;
+
 	down_read(&mm->mmap_sem);
 	vma = find_extend_vma(mm, address);
-	if (!vma || address < vma->vm_start) {
+	if (!vma || address < vma->vm_start)
 		/* failed to get a vma in the right range */
-		up_read(&mm->mmap_sem);
-		handle_fault_error(fault);
 		goto out;
-	}
 
 	/* Check if we have the right permissions on the vma */
-	if (access_error(vma, fault)) {
-		up_read(&mm->mmap_sem);
-		handle_fault_error(fault);
+	if (access_error(vma, fault))
 		goto out;
-	}
 
-	ret = handle_mm_fault(mm, vma, address, write);
-	if (ret & VM_FAULT_ERROR) {
-		/* failed to service fault */
-		up_read(&mm->mmap_sem);
-		handle_fault_error(fault);
-		goto out;
-	}
+	ret = handle_mm_fault(mm, vma, address, flags);
 
+out:
 	up_read(&mm->mmap_sem);
 
-out:
+	if (ret & VM_FAULT_ERROR)
+		/* failed to service fault */
+		handle_fault_error(fault);
+
 	finish_pri_tag(fault->dev_state, fault->state, fault->tag);
 
 	put_pasid_state(fault->state);
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 4e5118a4cd30..20875341c865 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -40,7 +40,10 @@
 #define IDR0_ST_LVL_SHIFT		27
 #define IDR0_ST_LVL_MASK		0x3
 #define IDR0_ST_LVL_2LVL		(1 << IDR0_ST_LVL_SHIFT)
-#define IDR0_STALL_MODEL		(3 << 24)
+#define IDR0_STALL_MODEL_SHIFT		24
+#define IDR0_STALL_MODEL_MASK		0x3
+#define IDR0_STALL_MODEL_STALL		(0 << IDR0_STALL_MODEL_SHIFT)
+#define IDR0_STALL_MODEL_FORCE		(2 << IDR0_STALL_MODEL_SHIFT)
 #define IDR0_TTENDIAN_SHIFT		21
 #define IDR0_TTENDIAN_MASK		0x3
 #define IDR0_TTENDIAN_LE		(2 << IDR0_TTENDIAN_SHIFT)
@@ -253,6 +256,9 @@
 #define STRTAB_STE_1_STRW_EL2		2UL
 #define STRTAB_STE_1_STRW_SHIFT		30
 
+#define STRTAB_STE_1_SHCFG_INCOMING	1UL
+#define STRTAB_STE_1_SHCFG_SHIFT	44
+
 #define STRTAB_STE_2_S2VMID_SHIFT	0
 #define STRTAB_STE_2_S2VMID_MASK	0xffffUL
 #define STRTAB_STE_2_VTCR_SHIFT		32
@@ -378,7 +384,6 @@
 #define PRIQ_0_SID_MASK			0xffffffffUL
 #define PRIQ_0_SSID_SHIFT		32
 #define PRIQ_0_SSID_MASK		0xfffffUL
-#define PRIQ_0_OF			(1UL << 57)
 #define PRIQ_0_PERM_PRIV		(1UL << 58)
 #define PRIQ_0_PERM_EXEC		(1UL << 59)
 #define PRIQ_0_PERM_READ		(1UL << 60)
@@ -855,15 +860,17 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
 	};
 
 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
-		cerror_str[idx]);
+		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
 
 	switch (idx) {
-	case CMDQ_ERR_CERROR_ILL_IDX:
-		break;
 	case CMDQ_ERR_CERROR_ABT_IDX:
 		dev_err(smmu->dev, "retrying command fetch\n");
 	case CMDQ_ERR_CERROR_NONE_IDX:
 		return;
+	case CMDQ_ERR_CERROR_ILL_IDX:
+		/* Fallthrough */
+	default:
+		break;
 	}
 
 	/*
@@ -1042,6 +1049,8 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
 		val |= disable_bypass ? STRTAB_STE_0_CFG_ABORT
 				      : STRTAB_STE_0_CFG_BYPASS;
 		dst[0] = cpu_to_le64(val);
+		dst[1] = cpu_to_le64(STRTAB_STE_1_SHCFG_INCOMING
+			 << STRTAB_STE_1_SHCFG_SHIFT);
 		dst[2] = 0; /* Nuke the VMID */
 		if (ste_live)
 			arm_smmu_sync_ste_for_sid(smmu, sid);
@@ -1056,12 +1065,14 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
 			 STRTAB_STE_1_S1C_CACHE_WBRA
 			 << STRTAB_STE_1_S1COR_SHIFT |
 			 STRTAB_STE_1_S1C_SH_ISH << STRTAB_STE_1_S1CSH_SHIFT |
-			 STRTAB_STE_1_S1STALLD |
 #ifdef CONFIG_PCI_ATS
 			 STRTAB_STE_1_EATS_TRANS << STRTAB_STE_1_EATS_SHIFT |
 #endif
 			 STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT);
 
+		if (smmu->features & ARM_SMMU_FEAT_STALLS)
+			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
+
 		val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK
 		        << STRTAB_STE_0_S1CTXPTR_SHIFT) |
 			STRTAB_STE_0_CFG_S1_TRANS;
@@ -1123,8 +1134,8 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
 	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
 
 	desc->span = STRTAB_SPLIT + 1;
-	desc->l2ptr = dma_zalloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
-					  GFP_KERNEL);
+	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
+					  GFP_KERNEL | __GFP_ZERO);
 	if (!desc->l2ptr) {
 		dev_err(smmu->dev,
 			"failed to allocate l2 stream table for SID %u\n",
@@ -1250,50 +1261,50 @@ static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
 
 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
 {
-	u32 gerror, gerrorn;
+	u32 gerror, gerrorn, active;
 	struct arm_smmu_device *smmu = dev;
 
 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
 
-	gerror ^= gerrorn;
-	if (!(gerror & GERROR_ERR_MASK))
+	active = gerror ^ gerrorn;
+	if (!(active & GERROR_ERR_MASK))
 		return IRQ_NONE; /* No errors pending */
 
 	dev_warn(smmu->dev,
 		 "unexpected global error reported (0x%08x), this could be serious\n",
-		 gerror);
+		 active);
 
-	if (gerror & GERROR_SFM_ERR) {
+	if (active & GERROR_SFM_ERR) {
 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
 		arm_smmu_device_disable(smmu);
 	}
 
-	if (gerror & GERROR_MSI_GERROR_ABT_ERR)
+	if (active & GERROR_MSI_GERROR_ABT_ERR)
 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
 
-	if (gerror & GERROR_MSI_PRIQ_ABT_ERR) {
+	if (active & GERROR_MSI_PRIQ_ABT_ERR) {
 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
 		arm_smmu_priq_handler(irq, smmu->dev);
 	}
 
-	if (gerror & GERROR_MSI_EVTQ_ABT_ERR) {
+	if (active & GERROR_MSI_EVTQ_ABT_ERR) {
 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
 		arm_smmu_evtq_handler(irq, smmu->dev);
 	}
 
-	if (gerror & GERROR_MSI_CMDQ_ABT_ERR) {
+	if (active & GERROR_MSI_CMDQ_ABT_ERR) {
 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
 		arm_smmu_cmdq_sync_handler(irq, smmu->dev);
 	}
 
-	if (gerror & GERROR_PRIQ_ABT_ERR)
+	if (active & GERROR_PRIQ_ABT_ERR)
 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
 
-	if (gerror & GERROR_EVTQ_ABT_ERR)
+	if (active & GERROR_EVTQ_ABT_ERR)
 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
 
-	if (gerror & GERROR_CMDQ_ERR)
+	if (active & GERROR_CMDQ_ERR)
 		arm_smmu_cmdq_skip_err(smmu);
 
 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
@@ -1335,7 +1346,7 @@ static void arm_smmu_tlb_inv_context(void *cookie)
 }
 
 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
-					  bool leaf, void *cookie)
+					  size_t granule, bool leaf, void *cookie)
 {
 	struct arm_smmu_domain *smmu_domain = cookie;
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
@@ -1354,7 +1365,10 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
 	}
 
-	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
+	do {
+		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
+		cmd.tlbi.addr += granule;
+	} while (size -= granule);
 }
 
 static struct iommu_gather_ops arm_smmu_gather_ops = {
@@ -1429,10 +1443,10 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
 		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
 
 		if (cfg->cdptr) {
-			dma_free_coherent(smmu_domain->smmu->dev,
-					  CTXDESC_CD_DWORDS << 3,
-					  cfg->cdptr,
-					  cfg->cdptr_dma);
+			dmam_free_coherent(smmu_domain->smmu->dev,
+					   CTXDESC_CD_DWORDS << 3,
+					   cfg->cdptr,
+					   cfg->cdptr_dma);
 
 			arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
 		}
@@ -1457,8 +1471,9 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
 	if (IS_ERR_VALUE(asid))
 		return asid;
 
-	cfg->cdptr = dma_zalloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3,
-					 &cfg->cdptr_dma, GFP_KERNEL);
+	cfg->cdptr = dmam_alloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3,
+					 &cfg->cdptr_dma,
+					 GFP_KERNEL | __GFP_ZERO);
 	if (!cfg->cdptr) {
 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
 		ret = -ENOMEM;
@@ -1804,13 +1819,13 @@ static int arm_smmu_add_device(struct device *dev)
 		smmu = arm_smmu_get_for_pci_dev(pdev);
 		if (!smmu) {
 			ret = -ENOENT;
-			goto out_put_group;
+			goto out_remove_dev;
 		}
 
 		smmu_group = kzalloc(sizeof(*smmu_group), GFP_KERNEL);
 		if (!smmu_group) {
 			ret = -ENOMEM;
-			goto out_put_group;
+			goto out_remove_dev;
 		}
 
 		smmu_group->ste.valid	= true;
@@ -1826,20 +1841,20 @@ static int arm_smmu_add_device(struct device *dev)
 	for (i = 0; i < smmu_group->num_sids; ++i) {
 		/* If we already know about this SID, then we're done */
 		if (smmu_group->sids[i] == sid)
-			return 0;
+			goto out_put_group;
 	}
 
 	/* Check the SID is in range of the SMMU and our stream table */
 	if (!arm_smmu_sid_in_range(smmu, sid)) {
 		ret = -ERANGE;
-		goto out_put_group;
+		goto out_remove_dev;
 	}
 
 	/* Ensure l2 strtab is initialised */
 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
 		ret = arm_smmu_init_l2_strtab(smmu, sid);
 		if (ret)
-			goto out_put_group;
+			goto out_remove_dev;
 	}
 
 	/* Resize the SID array for the group */
@@ -1849,16 +1864,20 @@ static int arm_smmu_add_device(struct device *dev)
 	if (!sids) {
 		smmu_group->num_sids--;
 		ret = -ENOMEM;
-		goto out_put_group;
+		goto out_remove_dev;
 	}
 
 	/* Add the new SID */
 	sids[smmu_group->num_sids - 1] = sid;
 	smmu_group->sids = sids;
-	return 0;
 
 out_put_group:
 	iommu_group_put(group);
+	return 0;
+
+out_remove_dev:
+	iommu_group_remove_device(dev);
+	iommu_group_put(group);
 	return ret;
 }
 
@@ -1937,7 +1956,7 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
 {
 	size_t qsz = ((1 << q->max_n_shift) * dwords) << 3;
 
-	q->base = dma_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL);
+	q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL);
 	if (!q->base) {
 		dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n",
 			qsz);
@@ -1957,23 +1976,6 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
 	return 0;
 }
 
-static void arm_smmu_free_one_queue(struct arm_smmu_device *smmu,
-				    struct arm_smmu_queue *q)
-{
-	size_t qsz = ((1 << q->max_n_shift) * q->ent_dwords) << 3;
-
-	dma_free_coherent(smmu->dev, qsz, q->base, q->base_dma);
-}
-
-static void arm_smmu_free_queues(struct arm_smmu_device *smmu)
-{
-	arm_smmu_free_one_queue(smmu, &smmu->cmdq.q);
-	arm_smmu_free_one_queue(smmu, &smmu->evtq.q);
-
-	if (smmu->features & ARM_SMMU_FEAT_PRI)
-		arm_smmu_free_one_queue(smmu, &smmu->priq.q);
-}
-
 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
 {
 	int ret;
@@ -1983,49 +1985,20 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
 				      ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS);
 	if (ret)
-		goto out;
+		return ret;
 
 	/* evtq */
 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
 				      ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS);
 	if (ret)
-		goto out_free_cmdq;
+		return ret;
 
 	/* priq */
 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
 		return 0;
 
-	ret = arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
-				      ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS);
-	if (ret)
-		goto out_free_evtq;
-
-	return 0;
-
-out_free_evtq:
-	arm_smmu_free_one_queue(smmu, &smmu->evtq.q);
-out_free_cmdq:
-	arm_smmu_free_one_queue(smmu, &smmu->cmdq.q);
-out:
-	return ret;
-}
-
-static void arm_smmu_free_l2_strtab(struct arm_smmu_device *smmu)
-{
-	int i;
-	size_t size;
-	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
-
-	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
-	for (i = 0; i < cfg->num_l1_ents; ++i) {
-		struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[i];
-
-		if (!desc->l2ptr)
-			continue;
-
-		dma_free_coherent(smmu->dev, size, desc->l2ptr,
-				  desc->l2ptr_dma);
-	}
+	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
+				       ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS);
 }
 
 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
@@ -2054,7 +2027,6 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
 	void *strtab;
 	u64 reg;
 	u32 size, l1size;
-	int ret;
 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
 
 	/*
@@ -2077,8 +2049,8 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
 			 size, smmu->sid_bits);
 
 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
-	strtab = dma_zalloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
-				     GFP_KERNEL);
+	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
+				     GFP_KERNEL | __GFP_ZERO);
 	if (!strtab) {
 		dev_err(smmu->dev,
 			"failed to allocate l1 stream table (%u bytes)\n",
@@ -2095,13 +2067,7 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
 		<< STRTAB_BASE_CFG_SPLIT_SHIFT;
 	cfg->strtab_base_cfg = reg;
 
-	ret = arm_smmu_init_l1_strtab(smmu);
-	if (ret)
-		dma_free_coherent(smmu->dev,
-				  l1size,
-				  strtab,
-				  cfg->strtab_dma);
-	return ret;
+	return arm_smmu_init_l1_strtab(smmu);
 }
 
 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
@@ -2112,8 +2078,8 @@ static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
 
 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
-	strtab = dma_zalloc_coherent(smmu->dev, size, &cfg->strtab_dma,
-				     GFP_KERNEL);
+	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
+				     GFP_KERNEL | __GFP_ZERO);
 	if (!strtab) {
 		dev_err(smmu->dev,
 			"failed to allocate linear stream table (%u bytes)\n",
@@ -2157,21 +2123,6 @@ static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
 	return 0;
 }
 
-static void arm_smmu_free_strtab(struct arm_smmu_device *smmu)
-{
-	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
-	u32 size = cfg->num_l1_ents;
-
-	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
-		arm_smmu_free_l2_strtab(smmu);
-		size *= STRTAB_L1_DESC_DWORDS << 3;
-	} else {
-		size *= STRTAB_STE_DWORDS * 3;
-	}
-
-	dma_free_coherent(smmu->dev, size, cfg->strtab, cfg->strtab_dma);
-}
-
 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
 {
 	int ret;
@@ -2180,21 +2131,7 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
 	if (ret)
 		return ret;
 
-	ret = arm_smmu_init_strtab(smmu);
-	if (ret)
-		goto out_free_queues;
-
-	return 0;
-
-out_free_queues:
-	arm_smmu_free_queues(smmu);
-	return ret;
-}
-
-static void arm_smmu_free_structures(struct arm_smmu_device *smmu)
-{
-	arm_smmu_free_strtab(smmu);
-	arm_smmu_free_queues(smmu);
+	return arm_smmu_init_strtab(smmu);
 }
 
 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
@@ -2532,8 +2469,12 @@ static int arm_smmu_device_probe(struct arm_smmu_device *smmu)
 		dev_warn(smmu->dev, "IDR0.COHACC overridden by dma-coherent property (%s)\n",
 			 coherent ? "true" : "false");
 
-	if (reg & IDR0_STALL_MODEL)
+	switch (reg & IDR0_STALL_MODEL_MASK << IDR0_STALL_MODEL_SHIFT) {
+	case IDR0_STALL_MODEL_STALL:
+		/* Fallthrough */
+	case IDR0_STALL_MODEL_FORCE:
 		smmu->features |= ARM_SMMU_FEAT_STALLS;
+	}
 
 	if (reg & IDR0_S1P)
 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
@@ -2699,15 +2640,7 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, smmu);
 
 	/* Reset the device */
-	ret = arm_smmu_device_reset(smmu);
-	if (ret)
-		goto out_free_structures;
-
-	return 0;
-
-out_free_structures:
-	arm_smmu_free_structures(smmu);
-	return ret;
+	return arm_smmu_device_reset(smmu);
 }
 
 static int arm_smmu_device_remove(struct platform_device *pdev)
@@ -2715,7 +2648,6 @@ static int arm_smmu_device_remove(struct platform_device *pdev)
 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
 
 	arm_smmu_device_disable(smmu);
-	arm_smmu_free_structures(smmu);
 	return 0;
 }
 
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 47dc7a793f5c..59ee4b8a3236 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -582,7 +582,7 @@ static void arm_smmu_tlb_inv_context(void *cookie)
 }
 
 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
-					  bool leaf, void *cookie)
+					  size_t granule, bool leaf, void *cookie)
 {
 	struct arm_smmu_domain *smmu_domain = cookie;
 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
@@ -597,12 +597,18 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 		if (!IS_ENABLED(CONFIG_64BIT) || smmu->version == ARM_SMMU_V1) {
 			iova &= ~12UL;
 			iova |= ARM_SMMU_CB_ASID(cfg);
-			writel_relaxed(iova, reg);
+			do {
+				writel_relaxed(iova, reg);
+				iova += granule;
+			} while (size -= granule);
 #ifdef CONFIG_64BIT
 		} else {
 			iova >>= 12;
 			iova |= (u64)ARM_SMMU_CB_ASID(cfg) << 48;
-			writeq_relaxed(iova, reg);
+			do {
+				writeq_relaxed(iova, reg);
+				iova += granule >> 12;
+			} while (size -= granule);
 #endif
 		}
 #ifdef CONFIG_64BIT
@@ -610,7 +616,11 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 		reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
 		reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
 			      ARM_SMMU_CB_S2_TLBIIPAS2;
-		writeq_relaxed(iova >> 12, reg);
+		iova >>= 12;
+		do {
+			writeq_relaxed(iova, reg);
+			iova += granule >> 12;
+		} while (size -= granule);
 #endif
 	} else {
 		reg = ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_TLBIVMID;
@@ -945,9 +955,7 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
 		free_irq(irq, domain);
 	}
 
-	if (smmu_domain->pgtbl_ops)
-		free_io_pgtable_ops(smmu_domain->pgtbl_ops);
-
+	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
 	__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
 }
 
@@ -1357,6 +1365,7 @@ static int arm_smmu_add_device(struct device *dev)
 	if (IS_ERR(group))
 		return PTR_ERR(group);
 
+	iommu_group_put(group);
 	return 0;
 }
 
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 3a20db4f8604..72d6182666cb 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -21,10 +21,13 @@
 
 #include <linux/device.h>
 #include <linux/dma-iommu.h>
+#include <linux/gfp.h>
 #include <linux/huge_mm.h>
 #include <linux/iommu.h>
 #include <linux/iova.h>
 #include <linux/mm.h>
+#include <linux/scatterlist.h>
+#include <linux/vmalloc.h>
 
 int iommu_dma_init(void)
 {
@@ -191,6 +194,7 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
 {
 	struct page **pages;
 	unsigned int i = 0, array_size = count * sizeof(*pages);
+	unsigned int order = MAX_ORDER;
 
 	if (array_size <= PAGE_SIZE)
 		pages = kzalloc(array_size, GFP_KERNEL);
@@ -204,14 +208,15 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
 
 	while (count) {
 		struct page *page = NULL;
-		int j, order = __fls(count);
+		int j;
 
 		/*
 		 * Higher-order allocations are a convenience rather
 		 * than a necessity, hence using __GFP_NORETRY until
 		 * falling back to single-page allocations.
 		 */
-		for (order = min(order, MAX_ORDER); order > 0; order--) {
+		for (order = min_t(unsigned int, order, __fls(count));
+		     order > 0; order--) {
 			page = alloc_pages(gfp | __GFP_NORETRY, order);
 			if (!page)
 				continue;
@@ -453,7 +458,7 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
 		size_t s_offset = iova_offset(iovad, s->offset);
 		size_t s_length = s->length;
 
-		sg_dma_address(s) = s->offset;
+		sg_dma_address(s) = s_offset;
 		sg_dma_len(s) = s_length;
 		s->offset -= s_offset;
 		s_length = iova_align(iovad, s_length + s_offset);
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 7df97777662d..8bbcbfe7695c 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -38,9 +38,6 @@
 #define io_pgtable_to_data(x)						\
 	container_of((x), struct arm_lpae_io_pgtable, iop)
 
-#define io_pgtable_ops_to_pgtable(x)					\
-	container_of((x), struct io_pgtable, ops)
-
 #define io_pgtable_ops_to_data(x)					\
 	io_pgtable_to_data(io_pgtable_ops_to_pgtable(x))
 
@@ -58,8 +55,10 @@
 	((((d)->levels - ((l) - ARM_LPAE_START_LVL(d) + 1))		\
 	  * (d)->bits_per_level) + (d)->pg_shift)
 
+#define ARM_LPAE_GRANULE(d)		(1UL << (d)->pg_shift)
+
 #define ARM_LPAE_PAGES_PER_PGD(d)					\
-	DIV_ROUND_UP((d)->pgd_size, 1UL << (d)->pg_shift)
+	DIV_ROUND_UP((d)->pgd_size, ARM_LPAE_GRANULE(d))
 
 /*
  * Calculate the index at level l used to map virtual address a using the
@@ -169,7 +168,7 @@
 /* IOPTE accessors */
 #define iopte_deref(pte,d)					\
 	(__va((pte) & ((1ULL << ARM_LPAE_MAX_ADDR_BITS) - 1)	\
-	& ~((1ULL << (d)->pg_shift) - 1)))
+	& ~(ARM_LPAE_GRANULE(d) - 1ULL)))
 
 #define iopte_type(pte,l)					\
 	(((pte) >> ARM_LPAE_PTE_TYPE_SHIFT) & ARM_LPAE_PTE_TYPE_MASK)
@@ -326,7 +325,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
 	/* Grab a pointer to the next level */
 	pte = *ptep;
 	if (!pte) {
-		cptep = __arm_lpae_alloc_pages(1UL << data->pg_shift,
+		cptep = __arm_lpae_alloc_pages(ARM_LPAE_GRANULE(data),
 					       GFP_ATOMIC, cfg);
 		if (!cptep)
 			return -ENOMEM;
@@ -405,17 +404,18 @@ static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
 	arm_lpae_iopte *start, *end;
 	unsigned long table_size;
 
-	/* Only leaf entries at the last level */
-	if (lvl == ARM_LPAE_MAX_LEVELS - 1)
-		return;
-
 	if (lvl == ARM_LPAE_START_LVL(data))
 		table_size = data->pgd_size;
 	else
-		table_size = 1UL << data->pg_shift;
+		table_size = ARM_LPAE_GRANULE(data);
 
 	start = ptep;
-	end = (void *)ptep + table_size;
+
+	/* Only leaf entries at the last level */
+	if (lvl == ARM_LPAE_MAX_LEVELS - 1)
+		end = ptep;
+	else
+		end = (void *)ptep + table_size;
 
 	while (ptep != end) {
 		arm_lpae_iopte pte = *ptep++;
@@ -473,7 +473,7 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
 
 	__arm_lpae_set_pte(ptep, table, cfg);
 	iova &= ~(blk_size - 1);
-	cfg->tlb->tlb_add_flush(iova, blk_size, true, data->iop.cookie);
+	cfg->tlb->tlb_add_flush(iova, blk_size, blk_size, true, data->iop.cookie);
 	return size;
 }
 
@@ -486,11 +486,13 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 	void *cookie = data->iop.cookie;
 	size_t blk_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
 
+	/* Something went horribly wrong and we ran out of page table */
+	if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
+		return 0;
+
 	ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
 	pte = *ptep;
-
-	/* Something went horribly wrong and we ran out of page table */
-	if (WARN_ON(!pte || (lvl == ARM_LPAE_MAX_LEVELS)))
+	if (WARN_ON(!pte))
 		return 0;
 
 	/* If the size matches this level, we're in the right place */
@@ -499,12 +501,13 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 
 		if (!iopte_leaf(pte, lvl)) {
 			/* Also flush any partial walks */
-			tlb->tlb_add_flush(iova, size, false, cookie);
+			tlb->tlb_add_flush(iova, size, ARM_LPAE_GRANULE(data),
+					   false, cookie);
 			tlb->tlb_sync(cookie);
 			ptep = iopte_deref(pte, data);
 			__arm_lpae_free_pgtable(data, lvl + 1, ptep);
 		} else {
-			tlb->tlb_add_flush(iova, size, true, cookie);
+			tlb->tlb_add_flush(iova, size, size, true, cookie);
 		}
 
 		return size;
@@ -570,7 +573,7 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
 	return 0;
 
 found_translation:
-	iova &= ((1 << data->pg_shift) - 1);
+	iova &= (ARM_LPAE_GRANULE(data) - 1);
 	return ((phys_addr_t)iopte_to_pfn(pte,data) << data->pg_shift) | iova;
 }
 
@@ -668,7 +671,7 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
 	      (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) |
 	      (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT);
 
-	switch (1 << data->pg_shift) {
+	switch (ARM_LPAE_GRANULE(data)) {
 	case SZ_4K:
 		reg |= ARM_LPAE_TCR_TG0_4K;
 		break;
@@ -769,7 +772,7 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
 
 	sl = ARM_LPAE_START_LVL(data);
 
-	switch (1 << data->pg_shift) {
+	switch (ARM_LPAE_GRANULE(data)) {
 	case SZ_4K:
 		reg |= ARM_LPAE_TCR_TG0_4K;
 		sl++; /* SL0 format is different for 4K granule size */
@@ -889,8 +892,8 @@ static void dummy_tlb_flush_all(void *cookie)
 	WARN_ON(cookie != cfg_cookie);
 }
 
-static void dummy_tlb_add_flush(unsigned long iova, size_t size, bool leaf,
-				void *cookie)
+static void dummy_tlb_add_flush(unsigned long iova, size_t size,
+				size_t granule, bool leaf, void *cookie)
 {
 	WARN_ON(cookie != cfg_cookie);
 	WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h
index ac9e2341a633..36673c83de58 100644
--- a/drivers/iommu/io-pgtable.h
+++ b/drivers/iommu/io-pgtable.h
@@ -26,8 +26,8 @@ enum io_pgtable_fmt {
  */
 struct iommu_gather_ops {
 	void (*tlb_flush_all)(void *cookie);
-	void (*tlb_add_flush)(unsigned long iova, size_t size, bool leaf,
-			      void *cookie);
+	void (*tlb_add_flush)(unsigned long iova, size_t size, size_t granule,
+			      bool leaf, void *cookie);
 	void (*tlb_sync)(void *cookie);
 };
 
@@ -131,6 +131,8 @@ struct io_pgtable {
 	struct io_pgtable_ops	ops;
 };
 
+#define io_pgtable_ops_to_pgtable(x) container_of((x), struct io_pgtable, ops)
+
 /**
  * struct io_pgtable_init_fns - Alloc/free a set of page tables for a
  *                              particular format.
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 8cf605fa9946..2fdbac67a77f 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -277,8 +277,8 @@ static void ipmmu_tlb_flush_all(void *cookie)
 	ipmmu_tlb_invalidate(domain);
 }
 
-static void ipmmu_tlb_add_flush(unsigned long iova, size_t size, bool leaf,
-				void *cookie)
+static void ipmmu_tlb_add_flush(unsigned long iova, size_t size,
+				size_t granule, bool leaf, void *cookie)
 {
 	/* The hardware doesn't support selective TLB flush. */
 }
@@ -295,7 +295,7 @@ static struct iommu_gather_ops ipmmu_gather_ops = {
 
 static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
 {
-	phys_addr_t ttbr;
+	u64 ttbr;
 
 	/*
 	 * Allocate the page table operations.
diff --git a/drivers/iommu/msm_iommu_dev.c b/drivers/iommu/msm_iommu_dev.c
index b6d01f97e537..4b09e815accf 100644
--- a/drivers/iommu/msm_iommu_dev.c
+++ b/drivers/iommu/msm_iommu_dev.c
@@ -359,30 +359,19 @@ static struct platform_driver msm_iommu_ctx_driver = {
 	.remove		= msm_iommu_ctx_remove,
 };
 
+static struct platform_driver * const drivers[] = {
+	&msm_iommu_driver,
+	&msm_iommu_ctx_driver,
+};
+
 static int __init msm_iommu_driver_init(void)
 {
-	int ret;
-	ret = platform_driver_register(&msm_iommu_driver);
-	if (ret != 0) {
-		pr_err("Failed to register IOMMU driver\n");
-		goto error;
-	}
-
-	ret = platform_driver_register(&msm_iommu_ctx_driver);
-	if (ret != 0) {
-		platform_driver_unregister(&msm_iommu_driver);
-		pr_err("Failed to register IOMMU context driver\n");
-		goto error;
-	}
-
-error:
-	return ret;
+	return platform_register_drivers(drivers, ARRAY_SIZE(drivers));
 }
 
 static void __exit msm_iommu_driver_exit(void)
 {
-	platform_driver_unregister(&msm_iommu_ctx_driver);
-	platform_driver_unregister(&msm_iommu_driver);
+	platform_unregister_drivers(drivers, ARRAY_SIZE(drivers));
 }
 
 subsys_initcall(msm_iommu_driver_init);
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 471ee36b9c6e..a04d491cf431 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -49,7 +49,7 @@ static bool s390_iommu_capable(enum iommu_cap cap)
 	}
 }
 
-struct iommu_domain *s390_domain_alloc(unsigned domain_type)
+static struct iommu_domain *s390_domain_alloc(unsigned domain_type)
 {
 	struct s390_domain *s390_domain;
 
@@ -73,7 +73,7 @@ struct iommu_domain *s390_domain_alloc(unsigned domain_type)
 	return &s390_domain->domain;
 }
 
-void s390_domain_free(struct iommu_domain *domain)
+static void s390_domain_free(struct iommu_domain *domain)
 {
 	struct s390_domain *s390_domain = to_s390_domain(domain);
 
diff --git a/drivers/iommu/shmobile-iommu.c b/drivers/iommu/shmobile-iommu.c
deleted file mode 100644
index a0287519a1d4..000000000000
--- a/drivers/iommu/shmobile-iommu.c
+++ /dev/null
@@ -1,402 +0,0 @@
-/*
- * IOMMU for IPMMU/IPMMUI
- * Copyright (C) 2012  Hideki EIRAKU
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- */
-
-#include <linux/dma-mapping.h>
-#include <linux/io.h>
-#include <linux/iommu.h>
-#include <linux/platform_device.h>
-#include <linux/sizes.h>
-#include <linux/slab.h>
-#include <asm/dma-iommu.h>
-#include "shmobile-ipmmu.h"
-
-#define L1_SIZE CONFIG_SHMOBILE_IOMMU_L1SIZE
-#define L1_LEN (L1_SIZE / 4)
-#define L1_ALIGN L1_SIZE
-#define L2_SIZE SZ_1K
-#define L2_LEN (L2_SIZE / 4)
-#define L2_ALIGN L2_SIZE
-
-struct shmobile_iommu_domain_pgtable {
-	uint32_t *pgtable;
-	dma_addr_t handle;
-};
-
-struct shmobile_iommu_archdata {
-	struct list_head attached_list;
-	struct dma_iommu_mapping *iommu_mapping;
-	spinlock_t attach_lock;
-	struct shmobile_iommu_domain *attached;
-	int num_attached_devices;
-	struct shmobile_ipmmu *ipmmu;
-};
-
-struct shmobile_iommu_domain {
-	struct shmobile_iommu_domain_pgtable l1, l2[L1_LEN];
-	spinlock_t map_lock;
-	spinlock_t attached_list_lock;
-	struct list_head attached_list;
-	struct iommu_domain domain;
-};
-
-static struct shmobile_iommu_archdata *ipmmu_archdata;
-static struct kmem_cache *l1cache, *l2cache;
-
-static struct shmobile_iommu_domain *to_sh_domain(struct iommu_domain *dom)
-{
-	return container_of(dom, struct shmobile_iommu_domain, domain);
-}
-
-static int pgtable_alloc(struct shmobile_iommu_domain_pgtable *pgtable,
-			 struct kmem_cache *cache, size_t size)
-{
-	pgtable->pgtable = kmem_cache_zalloc(cache, GFP_ATOMIC);
-	if (!pgtable->pgtable)
-		return -ENOMEM;
-	pgtable->handle = dma_map_single(NULL, pgtable->pgtable, size,
-					 DMA_TO_DEVICE);
-	return 0;
-}
-
-static void pgtable_free(struct shmobile_iommu_domain_pgtable *pgtable,
-			 struct kmem_cache *cache, size_t size)
-{
-	dma_unmap_single(NULL, pgtable->handle, size, DMA_TO_DEVICE);
-	kmem_cache_free(cache, pgtable->pgtable);
-}
-
-static uint32_t pgtable_read(struct shmobile_iommu_domain_pgtable *pgtable,
-			     unsigned int index)
-{
-	return pgtable->pgtable[index];
-}
-
-static void pgtable_write(struct shmobile_iommu_domain_pgtable *pgtable,
-			  unsigned int index, unsigned int count, uint32_t val)
-{
-	unsigned int i;
-
-	for (i = 0; i < count; i++)
-		pgtable->pgtable[index + i] = val;
-	dma_sync_single_for_device(NULL, pgtable->handle + index * sizeof(val),
-				   sizeof(val) * count, DMA_TO_DEVICE);
-}
-
-static struct iommu_domain *shmobile_iommu_domain_alloc(unsigned type)
-{
-	struct shmobile_iommu_domain *sh_domain;
-	int i, ret;
-
-	if (type != IOMMU_DOMAIN_UNMANAGED)
-		return NULL;
-
-	sh_domain = kzalloc(sizeof(*sh_domain), GFP_KERNEL);
-	if (!sh_domain)
-		return NULL;
-	ret = pgtable_alloc(&sh_domain->l1, l1cache, L1_SIZE);
-	if (ret < 0) {
-		kfree(sh_domain);
-		return NULL;
-	}
-	for (i = 0; i < L1_LEN; i++)
-		sh_domain->l2[i].pgtable = NULL;
-	spin_lock_init(&sh_domain->map_lock);
-	spin_lock_init(&sh_domain->attached_list_lock);
-	INIT_LIST_HEAD(&sh_domain->attached_list);
-	return &sh_domain->domain;
-}
-
-static void shmobile_iommu_domain_free(struct iommu_domain *domain)
-{
-	struct shmobile_iommu_domain *sh_domain = to_sh_domain(domain);
-	int i;
-
-	for (i = 0; i < L1_LEN; i++) {
-		if (sh_domain->l2[i].pgtable)
-			pgtable_free(&sh_domain->l2[i], l2cache, L2_SIZE);
-	}
-	pgtable_free(&sh_domain->l1, l1cache, L1_SIZE);
-	kfree(sh_domain);
-}
-
-static int shmobile_iommu_attach_device(struct iommu_domain *domain,
-					struct device *dev)
-{
-	struct shmobile_iommu_archdata *archdata = dev->archdata.iommu;
-	struct shmobile_iommu_domain *sh_domain = to_sh_domain(domain);
-	int ret = -EBUSY;
-
-	if (!archdata)
-		return -ENODEV;
-	spin_lock(&sh_domain->attached_list_lock);
-	spin_lock(&archdata->attach_lock);
-	if (archdata->attached != sh_domain) {
-		if (archdata->attached)
-			goto err;
-		ipmmu_tlb_set(archdata->ipmmu, sh_domain->l1.handle, L1_SIZE,
-			      0);
-		ipmmu_tlb_flush(archdata->ipmmu);
-		archdata->attached = sh_domain;
-		archdata->num_attached_devices = 0;
-		list_add(&archdata->attached_list, &sh_domain->attached_list);
-	}
-	archdata->num_attached_devices++;
-	ret = 0;
-err:
-	spin_unlock(&archdata->attach_lock);
-	spin_unlock(&sh_domain->attached_list_lock);
-	return ret;
-}
-
-static void shmobile_iommu_detach_device(struct iommu_domain *domain,
-					 struct device *dev)
-{
-	struct shmobile_iommu_archdata *archdata = dev->archdata.iommu;
-	struct shmobile_iommu_domain *sh_domain = to_sh_domain(domain);
-
-	if (!archdata)
-		return;
-	spin_lock(&sh_domain->attached_list_lock);
-	spin_lock(&archdata->attach_lock);
-	archdata->num_attached_devices--;
-	if (!archdata->num_attached_devices) {
-		ipmmu_tlb_set(archdata->ipmmu, 0, 0, 0);
-		ipmmu_tlb_flush(archdata->ipmmu);
-		archdata->attached = NULL;
-		list_del(&archdata->attached_list);
-	}
-	spin_unlock(&archdata->attach_lock);
-	spin_unlock(&sh_domain->attached_list_lock);
-}
-
-static void domain_tlb_flush(struct shmobile_iommu_domain *sh_domain)
-{
-	struct shmobile_iommu_archdata *archdata;
-
-	spin_lock(&sh_domain->attached_list_lock);
-	list_for_each_entry(archdata, &sh_domain->attached_list, attached_list)
-		ipmmu_tlb_flush(archdata->ipmmu);
-	spin_unlock(&sh_domain->attached_list_lock);
-}
-
-static int l2alloc(struct shmobile_iommu_domain *sh_domain,
-		   unsigned int l1index)
-{
-	int ret;
-
-	if (!sh_domain->l2[l1index].pgtable) {
-		ret = pgtable_alloc(&sh_domain->l2[l1index], l2cache, L2_SIZE);
-		if (ret < 0)
-			return ret;
-	}
-	pgtable_write(&sh_domain->l1, l1index, 1,
-		      sh_domain->l2[l1index].handle | 0x1);
-	return 0;
-}
-
-static void l2realfree(struct shmobile_iommu_domain_pgtable *l2)
-{
-	if (l2->pgtable)
-		pgtable_free(l2, l2cache, L2_SIZE);
-}
-
-static void l2free(struct shmobile_iommu_domain *sh_domain,
-		   unsigned int l1index,
-		   struct shmobile_iommu_domain_pgtable *l2)
-{
-	pgtable_write(&sh_domain->l1, l1index, 1, 0);
-	if (sh_domain->l2[l1index].pgtable) {
-		*l2 = sh_domain->l2[l1index];
-		sh_domain->l2[l1index].pgtable = NULL;
-	}
-}
-
-static int shmobile_iommu_map(struct iommu_domain *domain, unsigned long iova,
-			      phys_addr_t paddr, size_t size, int prot)
-{
-	struct shmobile_iommu_domain_pgtable l2 = { .pgtable = NULL };
-	struct shmobile_iommu_domain *sh_domain = to_sh_domain(domain);
-	unsigned int l1index, l2index;
-	int ret;
-
-	l1index = iova >> 20;
-	switch (size) {
-	case SZ_4K:
-		l2index = (iova >> 12) & 0xff;
-		spin_lock(&sh_domain->map_lock);
-		ret = l2alloc(sh_domain, l1index);
-		if (!ret)
-			pgtable_write(&sh_domain->l2[l1index], l2index, 1,
-				      paddr | 0xff2);
-		spin_unlock(&sh_domain->map_lock);
-		break;
-	case SZ_64K:
-		l2index = (iova >> 12) & 0xf0;
-		spin_lock(&sh_domain->map_lock);
-		ret = l2alloc(sh_domain, l1index);
-		if (!ret)
-			pgtable_write(&sh_domain->l2[l1index], l2index, 0x10,
-				      paddr | 0xff1);
-		spin_unlock(&sh_domain->map_lock);
-		break;
-	case SZ_1M:
-		spin_lock(&sh_domain->map_lock);
-		l2free(sh_domain, l1index, &l2);
-		pgtable_write(&sh_domain->l1, l1index, 1, paddr | 0xc02);
-		spin_unlock(&sh_domain->map_lock);
-		ret = 0;
-		break;
-	default:
-		ret = -EINVAL;
-	}
-	if (!ret)
-		domain_tlb_flush(sh_domain);
-	l2realfree(&l2);
-	return ret;
-}
-
-static size_t shmobile_iommu_unmap(struct iommu_domain *domain,
-				   unsigned long iova, size_t size)
-{
-	struct shmobile_iommu_domain_pgtable l2 = { .pgtable = NULL };
-	struct shmobile_iommu_domain *sh_domain = to_sh_domain(domain);
-	unsigned int l1index, l2index;
-	uint32_t l2entry = 0;
-	size_t ret = 0;
-
-	l1index = iova >> 20;
-	if (!(iova & 0xfffff) && size >= SZ_1M) {
-		spin_lock(&sh_domain->map_lock);
-		l2free(sh_domain, l1index, &l2);
-		spin_unlock(&sh_domain->map_lock);
-		ret = SZ_1M;
-		goto done;
-	}
-	l2index = (iova >> 12) & 0xff;
-	spin_lock(&sh_domain->map_lock);
-	if (sh_domain->l2[l1index].pgtable)
-		l2entry = pgtable_read(&sh_domain->l2[l1index], l2index);
-	switch (l2entry & 3) {
-	case 1:
-		if (l2index & 0xf)
-			break;
-		pgtable_write(&sh_domain->l2[l1index], l2index, 0x10, 0);
-		ret = SZ_64K;
-		break;
-	case 2:
-		pgtable_write(&sh_domain->l2[l1index], l2index, 1, 0);
-		ret = SZ_4K;
-		break;
-	}
-	spin_unlock(&sh_domain->map_lock);
-done:
-	if (ret)
-		domain_tlb_flush(sh_domain);
-	l2realfree(&l2);
-	return ret;
-}
-
-static phys_addr_t shmobile_iommu_iova_to_phys(struct iommu_domain *domain,
-					       dma_addr_t iova)
-{
-	struct shmobile_iommu_domain *sh_domain = to_sh_domain(domain);
-	uint32_t l1entry = 0, l2entry = 0;
-	unsigned int l1index, l2index;
-
-	l1index = iova >> 20;
-	l2index = (iova >> 12) & 0xff;
-	spin_lock(&sh_domain->map_lock);
-	if (sh_domain->l2[l1index].pgtable)
-		l2entry = pgtable_read(&sh_domain->l2[l1index], l2index);
-	else
-		l1entry = pgtable_read(&sh_domain->l1, l1index);
-	spin_unlock(&sh_domain->map_lock);
-	switch (l2entry & 3) {
-	case 1:
-		return (l2entry & ~0xffff) | (iova & 0xffff);
-	case 2:
-		return (l2entry & ~0xfff) | (iova & 0xfff);
-	default:
-		if ((l1entry & 3) == 2)
-			return (l1entry & ~0xfffff) | (iova & 0xfffff);
-		return 0;
-	}
-}
-
-static int find_dev_name(struct shmobile_ipmmu *ipmmu, const char *dev_name)
-{
-	unsigned int i, n = ipmmu->num_dev_names;
-
-	for (i = 0; i < n; i++) {
-		if (strcmp(ipmmu->dev_names[i], dev_name) == 0)
-			return 1;
-	}
-	return 0;
-}
-
-static int shmobile_iommu_add_device(struct device *dev)
-{
-	struct shmobile_iommu_archdata *archdata = ipmmu_archdata;
-	struct dma_iommu_mapping *mapping;
-
-	if (!find_dev_name(archdata->ipmmu, dev_name(dev)))
-		return 0;
-	mapping = archdata->iommu_mapping;
-	if (!mapping) {
-		mapping = arm_iommu_create_mapping(&platform_bus_type, 0,
-						   L1_LEN << 20);
-		if (IS_ERR(mapping))
-			return PTR_ERR(mapping);
-		archdata->iommu_mapping = mapping;
-	}
-	dev->archdata.iommu = archdata;
-	if (arm_iommu_attach_device(dev, mapping))
-		pr_err("arm_iommu_attach_device failed\n");
-	return 0;
-}
-
-static const struct iommu_ops shmobile_iommu_ops = {
-	.domain_alloc = shmobile_iommu_domain_alloc,
-	.domain_free = shmobile_iommu_domain_free,
-	.attach_dev = shmobile_iommu_attach_device,
-	.detach_dev = shmobile_iommu_detach_device,
-	.map = shmobile_iommu_map,
-	.unmap = shmobile_iommu_unmap,
-	.map_sg = default_iommu_map_sg,
-	.iova_to_phys = shmobile_iommu_iova_to_phys,
-	.add_device = shmobile_iommu_add_device,
-	.pgsize_bitmap = SZ_1M | SZ_64K | SZ_4K,
-};
-
-int ipmmu_iommu_init(struct shmobile_ipmmu *ipmmu)
-{
-	static struct shmobile_iommu_archdata *archdata;
-
-	l1cache = kmem_cache_create("shmobile-iommu-pgtable1", L1_SIZE,
-				    L1_ALIGN, SLAB_HWCACHE_ALIGN, NULL);
-	if (!l1cache)
-		return -ENOMEM;
-	l2cache = kmem_cache_create("shmobile-iommu-pgtable2", L2_SIZE,
-				    L2_ALIGN, SLAB_HWCACHE_ALIGN, NULL);
-	if (!l2cache) {
-		kmem_cache_destroy(l1cache);
-		return -ENOMEM;
-	}
-	archdata = kzalloc(sizeof(*archdata), GFP_KERNEL);
-	if (!archdata) {
-		kmem_cache_destroy(l1cache);
-		kmem_cache_destroy(l2cache);
-		return -ENOMEM;
-	}
-	spin_lock_init(&archdata->attach_lock);
-	archdata->ipmmu = ipmmu;
-	ipmmu_archdata = archdata;
-	bus_set_iommu(&platform_bus_type, &shmobile_iommu_ops);
-	return 0;
-}
diff --git a/drivers/iommu/shmobile-ipmmu.c b/drivers/iommu/shmobile-ipmmu.c
deleted file mode 100644
index 951651a9746b..000000000000
--- a/drivers/iommu/shmobile-ipmmu.c
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * IPMMU/IPMMUI
- * Copyright (C) 2012  Hideki EIRAKU
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- */
-
-#include <linux/err.h>
-#include <linux/export.h>
-#include <linux/io.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/platform_data/sh_ipmmu.h>
-#include "shmobile-ipmmu.h"
-
-#define IMCTR1 0x000
-#define IMCTR2 0x004
-#define IMASID 0x010
-#define IMTTBR 0x014
-#define IMTTBCR 0x018
-
-#define IMCTR1_TLBEN (1 << 0)
-#define IMCTR1_FLUSH (1 << 1)
-
-static void ipmmu_reg_write(struct shmobile_ipmmu *ipmmu, unsigned long reg_off,
-			    unsigned long data)
-{
-	iowrite32(data, ipmmu->ipmmu_base + reg_off);
-}
-
-void ipmmu_tlb_flush(struct shmobile_ipmmu *ipmmu)
-{
-	if (!ipmmu)
-		return;
-
-	spin_lock(&ipmmu->flush_lock);
-	if (ipmmu->tlb_enabled)
-		ipmmu_reg_write(ipmmu, IMCTR1, IMCTR1_FLUSH | IMCTR1_TLBEN);
-	else
-		ipmmu_reg_write(ipmmu, IMCTR1, IMCTR1_FLUSH);
-	spin_unlock(&ipmmu->flush_lock);
-}
-
-void ipmmu_tlb_set(struct shmobile_ipmmu *ipmmu, unsigned long phys, int size,
-		   int asid)
-{
-	if (!ipmmu)
-		return;
-
-	spin_lock(&ipmmu->flush_lock);
-	switch (size) {
-	default:
-		ipmmu->tlb_enabled = 0;
-		break;
-	case 0x2000:
-		ipmmu_reg_write(ipmmu, IMTTBCR, 1);
-		ipmmu->tlb_enabled = 1;
-		break;
-	case 0x1000:
-		ipmmu_reg_write(ipmmu, IMTTBCR, 2);
-		ipmmu->tlb_enabled = 1;
-		break;
-	case 0x800:
-		ipmmu_reg_write(ipmmu, IMTTBCR, 3);
-		ipmmu->tlb_enabled = 1;
-		break;
-	case 0x400:
-		ipmmu_reg_write(ipmmu, IMTTBCR, 4);
-		ipmmu->tlb_enabled = 1;
-		break;
-	case 0x200:
-		ipmmu_reg_write(ipmmu, IMTTBCR, 5);
-		ipmmu->tlb_enabled = 1;
-		break;
-	case 0x100:
-		ipmmu_reg_write(ipmmu, IMTTBCR, 6);
-		ipmmu->tlb_enabled = 1;
-		break;
-	case 0x80:
-		ipmmu_reg_write(ipmmu, IMTTBCR, 7);
-		ipmmu->tlb_enabled = 1;
-		break;
-	}
-	ipmmu_reg_write(ipmmu, IMTTBR, phys);
-	ipmmu_reg_write(ipmmu, IMASID, asid);
-	spin_unlock(&ipmmu->flush_lock);
-}
-
-static int ipmmu_probe(struct platform_device *pdev)
-{
-	struct shmobile_ipmmu *ipmmu;
-	struct resource *res;
-	struct shmobile_ipmmu_platform_data *pdata = pdev->dev.platform_data;
-
-	ipmmu = devm_kzalloc(&pdev->dev, sizeof(*ipmmu), GFP_KERNEL);
-	if (!ipmmu) {
-		dev_err(&pdev->dev, "cannot allocate device data\n");
-		return -ENOMEM;
-	}
-	spin_lock_init(&ipmmu->flush_lock);
-	ipmmu->dev = &pdev->dev;
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	ipmmu->ipmmu_base = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(ipmmu->ipmmu_base))
-		return PTR_ERR(ipmmu->ipmmu_base);
-
-	ipmmu->dev_names = pdata->dev_names;
-	ipmmu->num_dev_names = pdata->num_dev_names;
-	platform_set_drvdata(pdev, ipmmu);
-	ipmmu_reg_write(ipmmu, IMCTR1, 0x0); /* disable TLB */
-	ipmmu_reg_write(ipmmu, IMCTR2, 0x0); /* disable PMB */
-	return ipmmu_iommu_init(ipmmu);
-}
-
-static struct platform_driver ipmmu_driver = {
-	.probe = ipmmu_probe,
-	.driver = {
-		.name = "ipmmu",
-	},
-};
-
-static int __init ipmmu_init(void)
-{
-	return platform_driver_register(&ipmmu_driver);
-}
-subsys_initcall(ipmmu_init);
diff --git a/drivers/iommu/shmobile-ipmmu.h b/drivers/iommu/shmobile-ipmmu.h
deleted file mode 100644
index 9524743ca1fb..000000000000
--- a/drivers/iommu/shmobile-ipmmu.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/* shmobile-ipmmu.h
- *
- * Copyright (C) 2012  Hideki EIRAKU
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- */
-
-#ifndef __SHMOBILE_IPMMU_H__
-#define __SHMOBILE_IPMMU_H__
-
-struct shmobile_ipmmu {
-	struct device *dev;
-	void __iomem *ipmmu_base;
-	int tlb_enabled;
-	spinlock_t flush_lock;
-	const char * const *dev_names;
-	unsigned int num_dev_names;
-};
-
-#ifdef CONFIG_SHMOBILE_IPMMU_TLB
-void ipmmu_tlb_flush(struct shmobile_ipmmu *ipmmu);
-void ipmmu_tlb_set(struct shmobile_ipmmu *ipmmu, unsigned long phys, int size,
-		   int asid);
-int ipmmu_iommu_init(struct shmobile_ipmmu *ipmmu);
-#else
-static inline int ipmmu_iommu_init(struct shmobile_ipmmu *ipmmu)
-{
-	return -EINVAL;
-}
-#endif
-
-#endif /* __SHMOBILE_IPMMU_H__ */
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 95c13b2ffa79..ffa288474820 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -426,15 +426,6 @@ int add_mtd_device(struct mtd_info *mtd)
 	mtd->erasesize_mask = (1 << mtd->erasesize_shift) - 1;
 	mtd->writesize_mask = (1 << mtd->writesize_shift) - 1;
 
-	if (mtd->dev.parent) {
-		if (!mtd->owner && mtd->dev.parent->driver)
-			mtd->owner = mtd->dev.parent->driver->owner;
-		if (!mtd->name)
-			mtd->name = dev_name(mtd->dev.parent);
-	} else {
-		pr_debug("mtd device won't show a device symlink in sysfs\n");
-	}
-
 	/* Some chips always power up locked. Unlock them now */
 	if ((mtd->flags & MTD_WRITEABLE) && (mtd->flags & MTD_POWERUP_LOCK)) {
 		error = mtd_unlock(mtd, 0, mtd->size);
@@ -549,6 +540,21 @@ static int mtd_add_device_partitions(struct mtd_info *mtd,
 	return 0;
 }
 
+/*
+ * Set a few defaults based on the parent devices, if not provided by the
+ * driver
+ */
+static void mtd_set_dev_defaults(struct mtd_info *mtd)
+{
+	if (mtd->dev.parent) {
+		if (!mtd->owner && mtd->dev.parent->driver)
+			mtd->owner = mtd->dev.parent->driver->owner;
+		if (!mtd->name)
+			mtd->name = dev_name(mtd->dev.parent);
+	} else {
+		pr_debug("mtd device won't show a device symlink in sysfs\n");
+	}
+}
 
 /**
  * mtd_device_parse_register - parse partitions and register an MTD device.
@@ -587,6 +593,8 @@ int mtd_device_parse_register(struct mtd_info *mtd, const char * const *types,
 	int ret;
 	struct mtd_partition *real_parts = NULL;
 
+	mtd_set_dev_defaults(mtd);
+
 	ret = parse_mtd_partitions(mtd, types, &real_parts, parser_data);
 	if (ret <= 0 && nr_parts && parts) {
 		real_parts = kmemdup(parts, sizeof(*parts) * nr_parts,
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 49883905a434..32477c4eb421 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -516,8 +516,8 @@ static int stm_unlock(struct spi_nor *nor, loff_t ofs, uint64_t len)
 	status_old = read_sr(nor);
 
 	/* Cannot unlock; would unlock larger region than requested */
-	if (stm_is_locked_sr(nor, status_old, ofs - mtd->erasesize,
-			     mtd->erasesize))
+	if (stm_is_locked_sr(nor, ofs - mtd->erasesize, mtd->erasesize,
+			     status_old))
 		return -EINVAL;
 
 	/*
@@ -1200,8 +1200,7 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 
 	if (JEDEC_MFR(info) == SNOR_MFR_ATMEL ||
 	    JEDEC_MFR(info) == SNOR_MFR_INTEL ||
-	    JEDEC_MFR(info) == SNOR_MFR_SST ||
-	    JEDEC_MFR(info) == SNOR_MFR_WINBOND) {
+	    JEDEC_MFR(info) == SNOR_MFR_SST) {
 		write_enable(nor);
 		write_sr(nor, 0);
 	}
@@ -1217,8 +1216,7 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 	mtd->_read = spi_nor_read;
 
 	/* NOR protection support for STmicro/Micron chips and similar */
-	if (JEDEC_MFR(info) == SNOR_MFR_MICRON ||
-	    JEDEC_MFR(info) == SNOR_MFR_WINBOND) {
+	if (JEDEC_MFR(info) == SNOR_MFR_MICRON) {
 		nor->flash_lock = stm_lock;
 		nor->flash_unlock = stm_unlock;
 		nor->flash_is_locked = stm_is_locked;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
index c308429dd9c7..11dd91e4db56 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
@@ -295,6 +295,10 @@ struct clip_tbl *t4_init_clip_tbl(unsigned int clipt_start,
 		INIT_LIST_HEAD(&ctbl->hash_list[i]);
 
 	cl_list = t4_alloc_mem(clipt_size*sizeof(struct clip_entry));
+	if (!cl_list) {
+		t4_free_mem(ctbl);
+		return NULL;
+	}
 	ctbl->cl_list = (void *)cl_list;
 
 	for (i = 0; i < clipt_size; i++) {
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
index a5f422f26cb4..daf05155b732 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
@@ -772,8 +772,10 @@ int qlcnic_82xx_config_intrpt(struct qlcnic_adapter *adapter, u8 op_type)
 	int i, err = 0;
 
 	for (i = 0; i < ahw->num_msix; i++) {
-		qlcnic_alloc_mbx_args(&cmd, adapter,
-				      QLCNIC_CMD_MQ_TX_CONFIG_INTR);
+		err = qlcnic_alloc_mbx_args(&cmd, adapter,
+					    QLCNIC_CMD_MQ_TX_CONFIG_INTR);
+		if (err)
+			return err;
 		type = op_type ? QLCNIC_INTRPT_ADD : QLCNIC_INTRPT_DEL;
 		val = type | (ahw->intr_tbl[i].type << 4);
 		if (ahw->intr_tbl[i].type == QLCNIC_INTRPT_MSIX)
diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index 9f0b1c342b77..5a1e98547031 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -683,6 +683,12 @@ static void sixpack_close(struct tty_struct *tty)
 	if (!atomic_dec_and_test(&sp->refcnt))
 		down(&sp->dead_sem);
 
+	/* We must stop the queue to avoid potentially scribbling
+	 * on the free buffers. The sp->dead_sem is not sufficient
+	 * to protect us from sp->xbuff access.
+	 */
+	netif_stop_queue(sp->dev);
+
 	del_timer_sync(&sp->tx_t);
 	del_timer_sync(&sp->resync_t);
 
diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c
index 0b72b9de5207..85828f153445 100644
--- a/drivers/net/hamradio/mkiss.c
+++ b/drivers/net/hamradio/mkiss.c
@@ -797,6 +797,11 @@ static void mkiss_close(struct tty_struct *tty)
 	 */
 	if (!atomic_dec_and_test(&ax->refcnt))
 		down(&ax->dead_sem);
+	/*
+	 * Halt the transmit queue so that a new transmit cannot scribble
+	 * on our buffers
+	 */
+	netif_stop_queue(ax->dev);
 
 	/* Free all AX25 frame buffers. */
 	kfree(ax->rbuff);
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 9a5be8b85186..5fccc5a8153f 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -742,6 +742,7 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x413c, 0x81a9, 8)},	/* Dell Wireless 5808e Gobi(TM) 4G LTE Mobile Broadband Card */
 	{QMI_FIXED_INTF(0x413c, 0x81b1, 8)},	/* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card */
 	{QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)},	/* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */
+	{QMI_FIXED_INTF(0x22de, 0x9061, 3)},	/* WeTelecom WPD-600N */
 
 	/* 4. Gobi 1000 devices */
 	{QMI_GOBI1K_DEVICE(0x05c6, 0x9212)},	/* Acer Gobi Modem Device */
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 2e32c41536ae..2fb637ad594a 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -3525,6 +3525,14 @@ static int rtl8152_resume(struct usb_interface *intf)
 	return 0;
 }
 
+static int rtl8152_reset_resume(struct usb_interface *intf)
+{
+	struct r8152 *tp = usb_get_intfdata(intf);
+
+	clear_bit(SELECTIVE_SUSPEND, &tp->flags);
+	return rtl8152_resume(intf);
+}
+
 static void rtl8152_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 {
 	struct r8152 *tp = netdev_priv(dev);
@@ -4276,7 +4284,7 @@ static struct usb_driver rtl8152_driver = {
 	.disconnect =	rtl8152_disconnect,
 	.suspend =	rtl8152_suspend,
 	.resume =	rtl8152_resume,
-	.reset_resume =	rtl8152_resume,
+	.reset_resume =	rtl8152_reset_resume,
 	.pre_reset =	rtl8152_pre_reset,
 	.post_reset =	rtl8152_post_reset,
 	.supports_autosuspend = 1,
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 417903715437..0cbf520cea77 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -1380,10 +1380,10 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
 					skip_page_frags = true;
 					goto rcd_done;
 				}
-				new_dma_addr = dma_map_page(&adapter->pdev->dev
-							, rbi->page,
-							0, PAGE_SIZE,
-							PCI_DMA_FROMDEVICE);
+				new_dma_addr = dma_map_page(&adapter->pdev->dev,
+							    new_page,
+							    0, PAGE_SIZE,
+							    PCI_DMA_FROMDEVICE);
 				if (dma_mapping_error(&adapter->pdev->dev,
 						      new_dma_addr)) {
 					put_page(new_page);
diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index 4c58c83dc225..bdb8a6c0f8aa 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -69,10 +69,10 @@
 /*
  * Version numbers
  */
-#define VMXNET3_DRIVER_VERSION_STRING   "1.4.4.0-k"
+#define VMXNET3_DRIVER_VERSION_STRING   "1.4.5.0-k"
 
 /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */
-#define VMXNET3_DRIVER_VERSION_NUM      0x01040400
+#define VMXNET3_DRIVER_VERSION_NUM      0x01040500
 
 #if defined(CONFIG_PCI_MSI)
 	/* RSS only makes sense if MSI-X is supported. */
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 4f9748457f5a..0a242b200df4 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -800,7 +800,7 @@ static struct rtable *vrf_get_rtable(const struct net_device *dev,
 }
 
 /* called under rcu_read_lock */
-static void vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4)
+static int vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4)
 {
 	struct fib_result res = { .tclassid = 0 };
 	struct net *net = dev_net(dev);
@@ -808,9 +808,10 @@ static void vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4)
 	u8 flags = fl4->flowi4_flags;
 	u8 scope = fl4->flowi4_scope;
 	u8 tos = RT_FL_TOS(fl4);
+	int rc;
 
 	if (unlikely(!fl4->daddr))
-		return;
+		return 0;
 
 	fl4->flowi4_flags |= FLOWI_FLAG_SKIP_NH_OIF;
 	fl4->flowi4_iif = LOOPBACK_IFINDEX;
@@ -818,7 +819,8 @@ static void vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4)
 	fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
 			     RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
 
-	if (!fib_lookup(net, fl4, &res, 0)) {
+	rc = fib_lookup(net, fl4, &res, 0);
+	if (!rc) {
 		if (res.type == RTN_LOCAL)
 			fl4->saddr = res.fi->fib_prefsrc ? : fl4->daddr;
 		else
@@ -828,6 +830,8 @@ static void vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4)
 	fl4->flowi4_flags = flags;
 	fl4->flowi4_tos = orig_tos;
 	fl4->flowi4_scope = scope;
+
+	return rc;
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
index f131ba947dc6..c0ad9aaa16a7 100644
--- a/drivers/pci/host/Kconfig
+++ b/drivers/pci/host/Kconfig
@@ -5,6 +5,7 @@ config PCI_DRA7XX
 	bool "TI DRA7xx PCIe controller"
 	select PCIE_DW
 	depends on OF && HAS_IOMEM && TI_PIPE3
+	depends on BROKEN
 	help
 	 Enables support for the PCIe controller in the DRA7xx SoC.  There
 	 are two instances of PCIe controller in DRA7xx.  This controller can
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 3d22fc3e3c1a..4e08d1cd704d 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2885,10 +2885,13 @@ static int sd_revalidate_disk(struct gendisk *disk)
 
 	/*
 	 * Use the device's preferred I/O size for reads and writes
-	 * unless the reported value is unreasonably large (or garbage).
+	 * unless the reported value is unreasonably small, large, or
+	 * garbage.
 	 */
-	if (sdkp->opt_xfer_blocks && sdkp->opt_xfer_blocks <= dev_max &&
-	    sdkp->opt_xfer_blocks <= SD_DEF_XFER_BLOCKS)
+	if (sdkp->opt_xfer_blocks &&
+	    sdkp->opt_xfer_blocks <= dev_max &&
+	    sdkp->opt_xfer_blocks <= SD_DEF_XFER_BLOCKS &&
+	    sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_CACHE_SIZE)
 		rw_max = q->limits.io_opt =
 			logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
 	else
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 4165e9ac9e36..5972ffe5719a 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -493,6 +493,25 @@ static inline void bpf_jit_free(struct bpf_prog *fp)
 
 #define BPF_ANC		BIT(15)
 
+static inline bool bpf_needs_clear_a(const struct sock_filter *first)
+{
+	switch (first->code) {
+	case BPF_RET | BPF_K:
+	case BPF_LD | BPF_W | BPF_LEN:
+		return false;
+
+	case BPF_LD | BPF_W | BPF_ABS:
+	case BPF_LD | BPF_H | BPF_ABS:
+	case BPF_LD | BPF_B | BPF_ABS:
+		if (first->k == SKF_AD_OFF + SKF_AD_ALU_XOR_X)
+			return true;
+		return false;
+
+	default:
+		return true;
+	}
+}
+
 static inline u16 bpf_anc_helper(const struct sock_filter *ftest)
 {
 	BUG_ON(ftest->code & BPF_ANC);
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index eae6548efbf0..60048c50404e 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -586,6 +586,7 @@ extern int ftrace_arch_read_dyn_info(char *buf, int size);
 
 extern int skip_trace(unsigned long ip);
 extern void ftrace_module_init(struct module *mod);
+extern void ftrace_release_mod(struct module *mod);
 
 extern void ftrace_disable_daemon(void);
 extern void ftrace_enable_daemon(void);
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index c8723b62c4cd..bc742dac7d3a 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -25,7 +25,7 @@
 #define SNOR_MFR_MACRONIX	CFI_MFR_MACRONIX
 #define SNOR_MFR_SPANSION	CFI_MFR_AMD
 #define SNOR_MFR_SST		CFI_MFR_SST
-#define SNOR_MFR_WINBOND	0xef
+#define SNOR_MFR_WINBOND	0xef /* Also used by some Spansion */
 
 /*
  * Note on opcode nomenclature: some opcodes have a format like
diff --git a/include/linux/sched.h b/include/linux/sched.h
index edad7a43edea..fa39434e3fdd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1455,14 +1455,15 @@ struct task_struct {
 	/* Used for emulating ABI behavior of previous Linux versions */
 	unsigned int personality;
 
-	unsigned in_execve:1;	/* Tell the LSMs that the process is doing an
-				 * execve */
-	unsigned in_iowait:1;
-
-	/* Revert to default priority/policy when forking */
+	/* scheduler bits, serialized by scheduler locks */
 	unsigned sched_reset_on_fork:1;
 	unsigned sched_contributes_to_load:1;
 	unsigned sched_migrated:1;
+	unsigned :0; /* force alignment to the next boundary */
+
+	/* unserialized, strictly 'current' */
+	unsigned in_execve:1; /* bit to tell LSMs we're in execve */
+	unsigned in_iowait:1;
 #ifdef CONFIG_MEMCG
 	unsigned memcg_may_oom:1;
 #endif
@@ -2002,7 +2003,8 @@ static inline int pid_alive(const struct task_struct *p)
 }
 
 /**
- * is_global_init - check if a task structure is init
+ * is_global_init - check if a task structure is init. Since init
+ * is free to have sub-threads we need to check tgid.
  * @tsk: Task structure to be checked.
  *
  * Check if a task structure is the first user space task the kernel created.
@@ -2011,7 +2013,7 @@ static inline int pid_alive(const struct task_struct *p)
  */
 static inline int is_global_init(struct task_struct *tsk)
 {
-	return tsk->pid == 1;
+	return task_tgid_nr(tsk) == 1;
 }
 
 extern struct pid *cad_pid;
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index 774d85b2d5d9..5689a0c749f7 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -29,7 +29,7 @@ struct l3mdev_ops {
 	/* IPv4 ops */
 	struct rtable *	(*l3mdev_get_rtable)(const struct net_device *dev,
 					     const struct flowi4 *fl4);
-	void		(*l3mdev_get_saddr)(struct net_device *dev,
+	int		(*l3mdev_get_saddr)(struct net_device *dev,
 					    struct flowi4 *fl4);
 
 	/* IPv6 ops */
@@ -112,10 +112,11 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
 	return rc;
 }
 
-static inline void l3mdev_get_saddr(struct net *net, int ifindex,
-				    struct flowi4 *fl4)
+static inline int l3mdev_get_saddr(struct net *net, int ifindex,
+				   struct flowi4 *fl4)
 {
 	struct net_device *dev;
+	int rc = 0;
 
 	if (ifindex) {
 
@@ -124,11 +125,13 @@ static inline void l3mdev_get_saddr(struct net *net, int ifindex,
 		dev = dev_get_by_index_rcu(net, ifindex);
 		if (dev && netif_is_l3_master(dev) &&
 		    dev->l3mdev_ops->l3mdev_get_saddr) {
-			dev->l3mdev_ops->l3mdev_get_saddr(dev, fl4);
+			rc = dev->l3mdev_ops->l3mdev_get_saddr(dev, fl4);
 		}
 
 		rcu_read_unlock();
 	}
+
+	return rc;
 }
 
 static inline struct dst_entry *l3mdev_get_rt6_dst(const struct net_device *dev,
@@ -200,9 +203,10 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
 	return false;
 }
 
-static inline void l3mdev_get_saddr(struct net *net, int ifindex,
-				    struct flowi4 *fl4)
+static inline int l3mdev_get_saddr(struct net *net, int ifindex,
+				   struct flowi4 *fl4)
 {
+	return 0;
 }
 
 static inline
diff --git a/include/net/route.h b/include/net/route.h
index ee81307863d5..a3b9ef74a389 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -283,7 +283,12 @@ static inline struct rtable *ip_route_connect(struct flowi4 *fl4,
 			      sport, dport, sk);
 
 	if (!src && oif) {
-		l3mdev_get_saddr(net, oif, fl4);
+		int rc;
+
+		rc = l3mdev_get_saddr(net, oif, fl4);
+		if (rc < 0)
+			return ERR_PTR(rc);
+
 		src = fl4->saddr;
 	}
 	if (!dst || !src) {
diff --git a/include/sound/soc.h b/include/sound/soc.h
index a8b4b9c8b1d2..fb955e69a78e 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -1655,7 +1655,7 @@ extern const struct dev_pm_ops snd_soc_pm_ops;
 /* Helper functions */
 static inline void snd_soc_dapm_mutex_lock(struct snd_soc_dapm_context *dapm)
 {
-	mutex_lock(&dapm->card->dapm_mutex);
+	mutex_lock_nested(&dapm->card->dapm_mutex, SND_SOC_DAPM_CLASS_RUNTIME);
 }
 
 static inline void snd_soc_dapm_mutex_unlock(struct snd_soc_dapm_context *dapm)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index ef2d6ea10736..cfc227ccfceb 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3154,15 +3154,16 @@ static int event_enable_on_exec(struct perf_event *event,
  * Enable all of a task's events that have been marked enable-on-exec.
  * This expects task == current.
  */
-static void perf_event_enable_on_exec(struct perf_event_context *ctx)
+static void perf_event_enable_on_exec(int ctxn)
 {
-	struct perf_event_context *clone_ctx = NULL;
+	struct perf_event_context *ctx, *clone_ctx = NULL;
 	struct perf_event *event;
 	unsigned long flags;
 	int enabled = 0;
 	int ret;
 
 	local_irq_save(flags);
+	ctx = current->perf_event_ctxp[ctxn];
 	if (!ctx || !ctx->nr_events)
 		goto out;
 
@@ -3205,17 +3206,11 @@ out:
 
 void perf_event_exec(void)
 {
-	struct perf_event_context *ctx;
 	int ctxn;
 
 	rcu_read_lock();
-	for_each_task_context_nr(ctxn) {
-		ctx = current->perf_event_ctxp[ctxn];
-		if (!ctx)
-			continue;
-
-		perf_event_enable_on_exec(ctx);
-	}
+	for_each_task_context_nr(ctxn)
+		perf_event_enable_on_exec(ctxn);
 	rcu_read_unlock();
 }
 
@@ -6493,9 +6488,6 @@ struct swevent_htable {
 
 	/* Recursion avoidance in each contexts */
 	int				recursion[PERF_NR_CONTEXTS];
-
-	/* Keeps track of cpu being initialized/exited */
-	bool				online;
 };
 
 static DEFINE_PER_CPU(struct swevent_htable, swevent_htable);
@@ -6753,14 +6745,8 @@ static int perf_swevent_add(struct perf_event *event, int flags)
 	hwc->state = !(flags & PERF_EF_START);
 
 	head = find_swevent_head(swhash, event);
-	if (!head) {
-		/*
-		 * We can race with cpu hotplug code. Do not
-		 * WARN if the cpu just got unplugged.
-		 */
-		WARN_ON_ONCE(swhash->online);
+	if (WARN_ON_ONCE(!head))
 		return -EINVAL;
-	}
 
 	hlist_add_head_rcu(&event->hlist_entry, head);
 	perf_event_update_userpage(event);
@@ -6828,7 +6814,6 @@ static int swevent_hlist_get_cpu(struct perf_event *event, int cpu)
 	int err = 0;
 
 	mutex_lock(&swhash->hlist_mutex);
-
 	if (!swevent_hlist_deref(swhash) && cpu_online(cpu)) {
 		struct swevent_hlist *hlist;
 
@@ -9291,7 +9276,6 @@ static void perf_event_init_cpu(int cpu)
 	struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
 
 	mutex_lock(&swhash->hlist_mutex);
-	swhash->online = true;
 	if (swhash->hlist_refcount > 0) {
 		struct swevent_hlist *hlist;
 
@@ -9333,14 +9317,7 @@ static void perf_event_exit_cpu_context(int cpu)
 
 static void perf_event_exit_cpu(int cpu)
 {
-	struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
-
 	perf_event_exit_cpu_context(cpu);
-
-	mutex_lock(&swhash->hlist_mutex);
-	swhash->online = false;
-	swevent_hlist_release(swhash);
-	mutex_unlock(&swhash->hlist_mutex);
 }
 #else
 static inline void perf_event_exit_cpu(int cpu) { }
diff --git a/kernel/fork.c b/kernel/fork.c
index fce002ee3ddf..1155eac61687 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -380,6 +380,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 #endif
 	tsk->splice_pipe = NULL;
 	tsk->task_frag.page = NULL;
+	tsk->wake_q.next = NULL;
 
 	account_kernel_stack(ti, 1);
 
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 0eebaeef317b..6ead200370da 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1434,6 +1434,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
 	if (!desc)
 		return NULL;
 
+	chip_bus_lock(desc);
 	raw_spin_lock_irqsave(&desc->lock, flags);
 
 	/*
@@ -1447,7 +1448,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
 		if (!action) {
 			WARN(1, "Trying to free already-free IRQ %d\n", irq);
 			raw_spin_unlock_irqrestore(&desc->lock, flags);
-
+			chip_bus_sync_unlock(desc);
 			return NULL;
 		}
 
@@ -1475,6 +1476,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
 #endif
 
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
+	chip_bus_sync_unlock(desc);
 
 	unregister_handler_proc(irq, action);
 
@@ -1553,9 +1555,7 @@ void free_irq(unsigned int irq, void *dev_id)
 		desc->affinity_notify = NULL;
 #endif
 
-	chip_bus_lock(desc);
 	kfree(__free_irq(irq, dev_id));
-	chip_bus_sync_unlock(desc);
 }
 EXPORT_SYMBOL(free_irq);
 
diff --git a/kernel/module.c b/kernel/module.c
index 8f051a106676..38c7bd5583ff 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3571,6 +3571,12 @@ static int load_module(struct load_info *info, const char __user *uargs,
 	synchronize_sched();
 	mutex_unlock(&module_mutex);
  free_module:
+	/*
+	 * Ftrace needs to clean up what it initialized.
+	 * This does nothing if ftrace_module_init() wasn't called,
+	 * but it must be called outside of module_mutex.
+	 */
+	ftrace_release_mod(mod);
 	/* Free lock-classes; relies on the preceding sync_rcu() */
 	lockdep_free_key_range(mod->module_core, mod->core_size);
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 90e26b11deaa..cfdc0e61066c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2689,7 +2689,7 @@ static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
 	int decayed, removed = 0;
 
 	if (atomic_long_read(&cfs_rq->removed_load_avg)) {
-		long r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
+		s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
 		sa->load_avg = max_t(long, sa->load_avg - r, 0);
 		sa->load_sum = max_t(s64, sa->load_sum - r * LOAD_AVG_MAX, 0);
 		removed = 1;
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index 1c2b28536feb..060df67dbdd1 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -273,6 +273,7 @@ static const char **find_next(void *v, loff_t *pos)
 	if (*pos < last_index + start_index)
 		return __start___tracepoint_str + (*pos - last_index);
 
+	start_index += last_index;
 	return find_next_mod_format(start_index, v, fmt, pos);
 }
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 4ebc17d948cb..c54fd2924f25 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1483,6 +1483,7 @@ static void __init start_shepherd_timer(void)
 		BUG();
 	cpumask_copy(cpu_stat_off, cpu_online_mask);
 
+	vmstat_wq = alloc_workqueue("vmstat", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
 	schedule_delayed_work(&shepherd,
 		round_jiffies_relative(sysctl_stat_interval));
 }
@@ -1550,7 +1551,6 @@ static int __init setup_vmstat(void)
 
 	start_shepherd_timer();
 	cpu_notifier_register_done();
-	vmstat_wq = alloc_workqueue("vmstat", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
 #endif
 #ifdef CONFIG_PROC_FS
 	proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 12045dea276c..8a7ada8bb947 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -142,7 +142,10 @@ static void br_stp_start(struct net_bridge *br)
 	char *envp[] = { NULL };
 	struct net_bridge_port *p;
 
-	r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
+	if (net_eq(dev_net(br->dev), &init_net))
+		r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
+	else
+		r = -ENOENT;
 
 	spin_lock_bh(&br->lock);
 
diff --git a/net/core/dst.c b/net/core/dst.c
index e6dc77252fe9..a1656e3b8d72 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -301,12 +301,13 @@ void dst_release(struct dst_entry *dst)
 {
 	if (dst) {
 		int newrefcnt;
+		unsigned short nocache = dst->flags & DST_NOCACHE;
 
 		newrefcnt = atomic_dec_return(&dst->__refcnt);
 		if (unlikely(newrefcnt < 0))
 			net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
 					     __func__, dst, newrefcnt);
-		if (!newrefcnt && unlikely(dst->flags & DST_NOCACHE))
+		if (!newrefcnt && unlikely(nocache))
 			call_rcu(&dst->rcu_head, dst_destroy_rcu);
 	}
 }
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 63e5be0abd86..bc35f1842512 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -601,8 +601,11 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 			    (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
 			   daddr, saddr, 0, 0);
 
-	if (!saddr && ipc.oif)
-		l3mdev_get_saddr(net, ipc.oif, &fl4);
+	if (!saddr && ipc.oif) {
+		err = l3mdev_get_saddr(net, ipc.oif, &fl4);
+		if (err < 0)
+			goto done;
+	}
 
 	if (!inet->hdrincl) {
 		rfv.msg = msg;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2d656eef7f8e..d4c51158470f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2478,6 +2478,9 @@ static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked,
 	int newly_acked_sacked = prior_unsacked -
 				 (tp->packets_out - tp->sacked_out);
 
+	if (newly_acked_sacked <= 0 || WARN_ON_ONCE(!tp->prior_cwnd))
+		return;
+
 	tp->prr_delivered += newly_acked_sacked;
 	if (delta < 0) {
 		u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 0c7b0e61b917..c43890848641 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1025,8 +1025,11 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 				   flow_flags,
 				   faddr, saddr, dport, inet->inet_sport);
 
-		if (!saddr && ipc.oif)
-			l3mdev_get_saddr(net, ipc.oif, fl4);
+		if (!saddr && ipc.oif) {
+			err = l3mdev_get_saddr(net, ipc.oif, fl4);
+			if (err < 0)
+				goto out;
+		}
 
 		security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
 		rt = ip_route_output_flow(net, fl4, sk);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index e82a1ad80aa5..16bc83b2842a 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -658,8 +658,10 @@ static void qdisc_rcu_free(struct rcu_head *head)
 {
 	struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head);
 
-	if (qdisc_is_percpu_stats(qdisc))
+	if (qdisc_is_percpu_stats(qdisc)) {
 		free_percpu(qdisc->cpu_bstats);
+		free_percpu(qdisc->cpu_qstats);
+	}
 
 	kfree((char *) qdisc - qdisc->padded);
 }
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index a4631477cedf..ef05cd9403d4 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -953,32 +953,20 @@ fail:
 	return NULL;
 }
 
-static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
+static int unix_mknod(struct dentry *dentry, struct path *path, umode_t mode,
+		      struct path *res)
 {
-	struct dentry *dentry;
-	struct path path;
-	int err = 0;
-	/*
-	 * Get the parent directory, calculate the hash for last
-	 * component.
-	 */
-	dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
-	err = PTR_ERR(dentry);
-	if (IS_ERR(dentry))
-		return err;
+	int err;
 
-	/*
-	 * All right, let's create it.
-	 */
-	err = security_path_mknod(&path, dentry, mode, 0);
+	err = security_path_mknod(path, dentry, mode, 0);
 	if (!err) {
-		err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
+		err = vfs_mknod(d_inode(path->dentry), dentry, mode, 0);
 		if (!err) {
-			res->mnt = mntget(path.mnt);
+			res->mnt = mntget(path->mnt);
 			res->dentry = dget(dentry);
 		}
 	}
-	done_path_create(&path, dentry);
+
 	return err;
 }
 
@@ -989,10 +977,12 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	struct unix_sock *u = unix_sk(sk);
 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 	char *sun_path = sunaddr->sun_path;
-	int err;
+	int err, name_err;
 	unsigned int hash;
 	struct unix_address *addr;
 	struct hlist_head *list;
+	struct path path;
+	struct dentry *dentry;
 
 	err = -EINVAL;
 	if (sunaddr->sun_family != AF_UNIX)
@@ -1008,14 +998,34 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		goto out;
 	addr_len = err;
 
+	name_err = 0;
+	dentry = NULL;
+	if (sun_path[0]) {
+		/* Get the parent directory, calculate the hash for last
+		 * component.
+		 */
+		dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
+
+		if (IS_ERR(dentry)) {
+			/* delay report until after 'already bound' check */
+			name_err = PTR_ERR(dentry);
+			dentry = NULL;
+		}
+	}
+
 	err = mutex_lock_interruptible(&u->readlock);
 	if (err)
-		goto out;
+		goto out_path;
 
 	err = -EINVAL;
 	if (u->addr)
 		goto out_up;
 
+	if (name_err) {
+		err = name_err == -EEXIST ? -EADDRINUSE : name_err;
+		goto out_up;
+	}
+
 	err = -ENOMEM;
 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
 	if (!addr)
@@ -1026,11 +1036,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	addr->hash = hash ^ sk->sk_type;
 	atomic_set(&addr->refcnt, 1);
 
-	if (sun_path[0]) {
-		struct path path;
+	if (dentry) {
+		struct path u_path;
 		umode_t mode = S_IFSOCK |
 		       (SOCK_INODE(sock)->i_mode & ~current_umask());
-		err = unix_mknod(sun_path, mode, &path);
+		err = unix_mknod(dentry, &path, mode, &u_path);
 		if (err) {
 			if (err == -EEXIST)
 				err = -EADDRINUSE;
@@ -1038,9 +1048,9 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 			goto out_up;
 		}
 		addr->hash = UNIX_HASH_SIZE;
-		hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE-1);
+		hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
 		spin_lock(&unix_table_lock);
-		u->path = path;
+		u->path = u_path;
 		list = &unix_socket_table[hash];
 	} else {
 		spin_lock(&unix_table_lock);
@@ -1063,6 +1073,10 @@ out_unlock:
 	spin_unlock(&unix_table_lock);
 out_up:
 	mutex_unlock(&u->readlock);
+out_path:
+	if (dentry)
+		done_path_create(&path, dentry);
+
 out:
 	return err;
 }
diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c
index 301d70b0174f..e167592793a7 100644
--- a/scripts/recordmcount.c
+++ b/scripts/recordmcount.c
@@ -586,7 +586,7 @@ main(int argc, char *argv[])
 			do_file(file);
 			break;
 		case SJ_FAIL:    /* error in do_file or below */
-			sprintf("%s: failed\n", file);
+			fprintf(stderr, "%s: failed\n", file);
 			++n_error;
 			break;
 		case SJ_SUCCEED:    /* premature success */
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index fe96428aa403..3a89d82f8057 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -67,6 +67,10 @@ enum {
 	ALC_HEADSET_TYPE_OMTP,
 };
 
+enum {
+	ALC_KEY_MICMUTE_INDEX,
+};
+
 struct alc_customize_define {
 	unsigned int  sku_cfg;
 	unsigned char port_connectivity;
@@ -123,6 +127,7 @@ struct alc_spec {
 	unsigned int pll_coef_idx, pll_coef_bit;
 	unsigned int coef0;
 	struct input_dev *kb_dev;
+	u8 alc_mute_keycode_map[1];
 };
 
 /*
@@ -3462,12 +3467,43 @@ static void gpio2_mic_hotkey_event(struct hda_codec *codec,
 
 	/* GPIO2 just toggles on a keypress/keyrelease cycle. Therefore
 	   send both key on and key off event for every interrupt. */
-	input_report_key(spec->kb_dev, KEY_MICMUTE, 1);
+	input_report_key(spec->kb_dev, spec->alc_mute_keycode_map[ALC_KEY_MICMUTE_INDEX], 1);
 	input_sync(spec->kb_dev);
-	input_report_key(spec->kb_dev, KEY_MICMUTE, 0);
+	input_report_key(spec->kb_dev, spec->alc_mute_keycode_map[ALC_KEY_MICMUTE_INDEX], 0);
 	input_sync(spec->kb_dev);
 }
 
+static int alc_register_micmute_input_device(struct hda_codec *codec)
+{
+	struct alc_spec *spec = codec->spec;
+	int i;
+
+	spec->kb_dev = input_allocate_device();
+	if (!spec->kb_dev) {
+		codec_err(codec, "Out of memory (input_allocate_device)\n");
+		return -ENOMEM;
+	}
+
+	spec->alc_mute_keycode_map[ALC_KEY_MICMUTE_INDEX] = KEY_MICMUTE;
+
+	spec->kb_dev->name = "Microphone Mute Button";
+	spec->kb_dev->evbit[0] = BIT_MASK(EV_KEY);
+	spec->kb_dev->keycodesize = sizeof(spec->alc_mute_keycode_map[0]);
+	spec->kb_dev->keycodemax = ARRAY_SIZE(spec->alc_mute_keycode_map);
+	spec->kb_dev->keycode = spec->alc_mute_keycode_map;
+	for (i = 0; i < ARRAY_SIZE(spec->alc_mute_keycode_map); i++)
+		set_bit(spec->alc_mute_keycode_map[i], spec->kb_dev->keybit);
+
+	if (input_register_device(spec->kb_dev)) {
+		codec_err(codec, "input_register_device failed\n");
+		input_free_device(spec->kb_dev);
+		spec->kb_dev = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
 static void alc280_fixup_hp_gpio2_mic_hotkey(struct hda_codec *codec,
 					     const struct hda_fixup *fix, int action)
 {
@@ -3485,20 +3521,8 @@ static void alc280_fixup_hp_gpio2_mic_hotkey(struct hda_codec *codec,
 	struct alc_spec *spec = codec->spec;
 
 	if (action == HDA_FIXUP_ACT_PRE_PROBE) {
-		spec->kb_dev = input_allocate_device();
-		if (!spec->kb_dev) {
-			codec_err(codec, "Out of memory (input_allocate_device)\n");
+		if (alc_register_micmute_input_device(codec) != 0)
 			return;
-		}
-		spec->kb_dev->name = "Microphone Mute Button";
-		spec->kb_dev->evbit[0] = BIT_MASK(EV_KEY);
-		spec->kb_dev->keybit[BIT_WORD(KEY_MICMUTE)] = BIT_MASK(KEY_MICMUTE);
-		if (input_register_device(spec->kb_dev)) {
-			codec_err(codec, "input_register_device failed\n");
-			input_free_device(spec->kb_dev);
-			spec->kb_dev = NULL;
-			return;
-		}
 
 		snd_hda_add_verbs(codec, gpio_init);
 		snd_hda_codec_write_cache(codec, codec->core.afg, 0,
@@ -3528,6 +3552,47 @@ static void alc280_fixup_hp_gpio2_mic_hotkey(struct hda_codec *codec,
 	}
 }
 
+static void alc233_fixup_lenovo_line2_mic_hotkey(struct hda_codec *codec,
+					     const struct hda_fixup *fix, int action)
+{
+	/* Line2 = mic mute hotkey
+	   GPIO2 = mic mute LED */
+	static const struct hda_verb gpio_init[] = {
+		{ 0x01, AC_VERB_SET_GPIO_MASK, 0x04 },
+		{ 0x01, AC_VERB_SET_GPIO_DIRECTION, 0x04 },
+		{}
+	};
+
+	struct alc_spec *spec = codec->spec;
+
+	if (action == HDA_FIXUP_ACT_PRE_PROBE) {
+		if (alc_register_micmute_input_device(codec) != 0)
+			return;
+
+		snd_hda_add_verbs(codec, gpio_init);
+		snd_hda_jack_detect_enable_callback(codec, 0x1b,
+						    gpio2_mic_hotkey_event);
+
+		spec->gen.cap_sync_hook = alc_fixup_gpio_mic_mute_hook;
+		spec->gpio_led = 0;
+		spec->mute_led_polarity = 0;
+		spec->gpio_mic_led_mask = 0x04;
+		return;
+	}
+
+	if (!spec->kb_dev)
+		return;
+
+	switch (action) {
+	case HDA_FIXUP_ACT_PROBE:
+		spec->init_amp = ALC_INIT_DEFAULT;
+		break;
+	case HDA_FIXUP_ACT_FREE:
+		input_unregister_device(spec->kb_dev);
+		spec->kb_dev = NULL;
+	}
+}
+
 static void alc269_fixup_hp_line1_mic1_led(struct hda_codec *codec,
 				const struct hda_fixup *fix, int action)
 {
@@ -4628,6 +4693,7 @@ enum {
 	ALC275_FIXUP_DELL_XPS,
 	ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE,
 	ALC293_FIXUP_LENOVO_SPK_NOISE,
+	ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -5237,6 +5303,10 @@ static const struct hda_fixup alc269_fixups[] = {
 		.chained = true,
 		.chain_id = ALC269_FIXUP_THINKPAD_ACPI
 	},
+	[ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = alc233_fixup_lenovo_line2_mic_hotkey,
+	},
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -5386,6 +5456,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x17aa, 0x2223, "ThinkPad T550", ALC292_FIXUP_TPT440_DOCK),
 	SND_PCI_QUIRK(0x17aa, 0x2226, "ThinkPad X250", ALC292_FIXUP_TPT440_DOCK),
 	SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC293_FIXUP_LENOVO_SPK_NOISE),
+	SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
 	SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC),
 	SND_PCI_QUIRK(0x17aa, 0x3978, "IdeaPad Y410P", ALC269_FIXUP_NO_SHUTUP),
 	SND_PCI_QUIRK(0x17aa, 0x5013, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
diff --git a/sound/soc/codecs/arizona.c b/sound/soc/codecs/arizona.c
index b3ea24d64c50..93b400800905 100644
--- a/sound/soc/codecs/arizona.c
+++ b/sound/soc/codecs/arizona.c
@@ -1537,7 +1537,7 @@ static int arizona_hw_params(struct snd_pcm_substream *substream,
 	bool reconfig;
 	unsigned int aif_tx_state, aif_rx_state;
 
-	if (params_rate(params) % 8000)
+	if (params_rate(params) % 4000)
 		rates = &arizona_44k1_bclk_rates[0];
 	else
 		rates = &arizona_48k_bclk_rates[0];
diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
index ef76940f9dcb..3e3c7f6be29d 100644
--- a/sound/soc/codecs/rt5645.c
+++ b/sound/soc/codecs/rt5645.c
@@ -1667,9 +1667,13 @@ static int rt5645_spk_event(struct snd_soc_dapm_widget *w,
 			RT5645_PWR_CLS_D_L,
 			RT5645_PWR_CLS_D | RT5645_PWR_CLS_D_R |
 			RT5645_PWR_CLS_D_L);
+		snd_soc_update_bits(codec, RT5645_GEN_CTRL3,
+			RT5645_DET_CLK_MASK, RT5645_DET_CLK_MODE1);
 		break;
 
 	case SND_SOC_DAPM_PRE_PMD:
+		snd_soc_update_bits(codec, RT5645_GEN_CTRL3,
+			RT5645_DET_CLK_MASK, RT5645_DET_CLK_DIS);
 		snd_soc_write(codec, RT5645_EQ_CTRL2, 0);
 		snd_soc_update_bits(codec, RT5645_PWR_DIG1,
 			RT5645_PWR_CLS_D | RT5645_PWR_CLS_D_R |
diff --git a/sound/soc/codecs/rt5645.h b/sound/soc/codecs/rt5645.h
index 093e46d559fb..205e0715c99a 100644
--- a/sound/soc/codecs/rt5645.h
+++ b/sound/soc/codecs/rt5645.h
@@ -2122,6 +2122,10 @@ enum {
 /* General Control3 (0xfc) */
 #define RT5645_JD_PSV_MODE			(0x1 << 12)
 #define RT5645_IRQ_CLK_GATE_CTRL		(0x1 << 11)
+#define RT5645_DET_CLK_MASK			(0x3 << 9)
+#define RT5645_DET_CLK_DIS			(0x0 << 9)
+#define RT5645_DET_CLK_MODE1			(0x1 << 9)
+#define RT5645_DET_CLK_MODE2			(0x2 << 9)
 #define RT5645_MICINDET_MANU			(0x1 << 7)
 #define RT5645_RING2_SLEEVE_GND			(0x1 << 5)
 
diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c
index ffea427aeca8..ad4d0f82603e 100644
--- a/sound/soc/intel/skylake/skl-topology.c
+++ b/sound/soc/intel/skylake/skl-topology.c
@@ -1240,7 +1240,6 @@ int skl_tplg_init(struct snd_soc_platform *platform, struct hdac_ext_bus *ebus)
 	 */
 	ret = snd_soc_tplg_component_load(&platform->component,
 					&skl_tplg_ops, fw, 0);
-	release_firmware(fw);
 	if (ret < 0) {
 		dev_err(bus->dev, "tplg component load failed%d\n", ret);
 		return -EINVAL;
@@ -1249,5 +1248,7 @@ int skl_tplg_init(struct snd_soc_platform *platform, struct hdac_ext_bus *ebus)
 	skl->resource.max_mcps = SKL_MAX_MCPS;
 	skl->resource.max_mem = SKL_FW_MAX_MEM;
 
+	skl->tplg = fw;
+
 	return 0;
 }
diff --git a/sound/soc/intel/skylake/skl.c b/sound/soc/intel/skylake/skl.c
index 5319529aedf7..caa69c4598a6 100644
--- a/sound/soc/intel/skylake/skl.c
+++ b/sound/soc/intel/skylake/skl.c
@@ -25,6 +25,7 @@
 #include <linux/pci.h>
 #include <linux/pm_runtime.h>
 #include <linux/platform_device.h>
+#include <linux/firmware.h>
 #include <sound/pcm.h>
 #include "skl.h"
 
@@ -520,6 +521,9 @@ static void skl_remove(struct pci_dev *pci)
 	struct hdac_ext_bus *ebus = pci_get_drvdata(pci);
 	struct skl *skl = ebus_to_skl(ebus);
 
+	if (skl->tplg)
+		release_firmware(skl->tplg);
+
 	if (pci_dev_run_wake(pci))
 		pm_runtime_get_noresume(&pci->dev);
 	pci_dev_put(pci);
diff --git a/sound/soc/intel/skylake/skl.h b/sound/soc/intel/skylake/skl.h
index dd2e79ae45a8..a0709e344d44 100644
--- a/sound/soc/intel/skylake/skl.h
+++ b/sound/soc/intel/skylake/skl.h
@@ -68,6 +68,8 @@ struct skl {
 	struct skl_dsp_resource resource;
 	struct list_head ppl_list;
 	struct list_head dapm_path_list;
+
+	const struct firmware *tplg;
 };
 
 #define skl_to_ebus(s)	(&(s)->ebus)
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index 918b4de29de4..6419f57b0850 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -110,7 +110,7 @@ int cmd_buildid_list(int argc, const char **argv,
 	setup_pager();
 
 	if (show_kernel)
-		return sysfs__fprintf_build_id(stdout);
+		return !(sysfs__fprintf_build_id(stdout) > 0);
 
 	return perf_session__list_build_ids(force, with_hits);
 }
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index fa9eb92c9e24..81def6c3f24b 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -298,6 +298,9 @@ static bool hist_browser__toggle_fold(struct hist_browser *browser)
 	struct callchain_list *cl = container_of(ms, struct callchain_list, ms);
 	bool has_children;
 
+	if (!he || !ms)
+		return false;
+
 	if (ms == &he->ms)
 		has_children = hist_entry__toggle_fold(he);
 	else
@@ -928,6 +931,8 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser)
 	}
 
 	ui_browser__hists_init_top(browser);
+	hb->he_selection = NULL;
+	hb->selection = NULL;
 
 	for (nd = browser->top; nd; nd = rb_next(nd)) {
 		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
@@ -1033,6 +1038,9 @@ static void ui_browser__hists_seek(struct ui_browser *browser,
 	 * and stop when we printed enough lines to fill the screen.
 	 */
 do_offset:
+	if (!nd)
+		return;
+
 	if (offset > 0) {
 		do {
 			h = rb_entry(nd, struct hist_entry, rb_node);
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 217b5a60e2ab..6a7e273a514a 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -91,7 +91,7 @@ int build_id__sprintf(const u8 *build_id, int len, char *bf)
 		bid += 2;
 	}
 
-	return raw - build_id;
+	return (bid - bf) + 1;
 }
 
 int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id)
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index e48d9da75707..6fc8cd753e1a 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -124,6 +124,10 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = {
 		.symbol = "dummy",
 		.alias  = "",
 	},
+	[PERF_COUNT_SW_BPF_OUTPUT] = {
+		.symbol = "bpf-output",
+		.alias  = "",
+	},
 };
 
 #define __PERF_EVENT_FIELD(config, name) \
@@ -1879,7 +1883,7 @@ restart:
 
 	for (i = 0; i < max; i++, syms++) {
 
-		if (event_glob != NULL &&
+		if (event_glob != NULL && syms->symbol != NULL &&
 		    !(strglobmatch(syms->symbol, event_glob) ||
 		      (syms->alias && strglobmatch(syms->alias, event_glob))))
 			continue;