diff options
Diffstat (limited to 'arch/powerpc/mm')
29 files changed, 708 insertions, 369 deletions
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c index 12d92518e898..ea2b9af08a48 100644 --- a/arch/powerpc/mm/44x_mmu.c +++ b/arch/powerpc/mm/44x_mmu.c @@ -29,6 +29,7 @@ #include <asm/mmu.h> #include <asm/page.h> #include <asm/cacheflush.h> +#include <asm/code-patching.h> #include "mmu_decl.h" @@ -43,22 +44,13 @@ unsigned long tlb_47x_boltmap[1024/8]; static void ppc44x_update_tlb_hwater(void) { - extern unsigned int tlb_44x_patch_hwater_D[]; - extern unsigned int tlb_44x_patch_hwater_I[]; - /* The TLB miss handlers hard codes the watermark in a cmpli * instruction to improve performances rather than loading it * from the global variable. Thus, we patch the instructions * in the 2 TLB miss handlers when updating the value */ - tlb_44x_patch_hwater_D[0] = (tlb_44x_patch_hwater_D[0] & 0xffff0000) | - tlb_44x_hwater; - flush_icache_range((unsigned long)&tlb_44x_patch_hwater_D[0], - (unsigned long)&tlb_44x_patch_hwater_D[1]); - tlb_44x_patch_hwater_I[0] = (tlb_44x_patch_hwater_I[0] & 0xffff0000) | - tlb_44x_hwater; - flush_icache_range((unsigned long)&tlb_44x_patch_hwater_I[0], - (unsigned long)&tlb_44x_patch_hwater_I[1]); + modify_instruction_site(&patch__tlb_44x_hwater_D, 0xffff, tlb_44x_hwater); + modify_instruction_site(&patch__tlb_44x_hwater_I, 0xffff, tlb_44x_hwater); } /* diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c index 01b7f5107c3a..bfa503cff351 100644 --- a/arch/powerpc/mm/8xx_mmu.c +++ b/arch/powerpc/mm/8xx_mmu.c @@ -100,11 +100,7 @@ static void __init mmu_mapin_immr(void) static void __init mmu_patch_cmp_limit(s32 *site, unsigned long mapped) { - unsigned int instr = *(unsigned int *)patch_site_addr(site); - - instr &= 0xffff0000; - instr |= (unsigned long)__va(mapped) >> 16; - patch_instruction_site(site, instr); + modify_instruction_site(site, 0xffff, (unsigned long)__va(mapped) >> 16); } unsigned long __init mmu_mapin_ram(unsigned long top) @@ -175,12 +171,12 @@ void set_context(unsigned long id, pgd_t *pgd) *(ptr + 1) = pgd; #endif - /* Register M_TW will contain base address of level 1 table minus the + /* Register M_TWB will contain base address of level 1 table minus the * lower part of the kernel PGDIR base address, so that all accesses to * level 1 table are done relative to lower part of kernel PGDIR base * address. */ - mtspr(SPRN_M_TW, __pa(pgd) - offset); + mtspr(SPRN_M_TWB, __pa(pgd) - offset); /* Update context */ mtspr(SPRN_M_CASID, id - 1); diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index ca96e7be4d0e..f965fc33a8b7 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -15,10 +15,13 @@ obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o obj-$(CONFIG_PPC_BOOK3E_64) += pgtable-book3e.o -obj-$(CONFIG_PPC_BOOK3S_64) += pgtable-hash64.o hash_utils_64.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o +obj-$(CONFIG_PPC_BOOK3S_64) += pgtable-hash64.o hash_utils_64.o slb.o \ + $(hash64-y) mmu_context_book3s64.o \ + pgtable-book3s64.o pgtable-frag.o +obj-$(CONFIG_PPC32) += pgtable-frag.o obj-$(CONFIG_PPC_RADIX_MMU) += pgtable-radix.o tlb-radix.o -obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o -obj-$(CONFIG_PPC_STD_MMU) += tlb_hash$(BITS).o +obj-$(CONFIG_PPC_BOOK3S_32) += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o +obj-$(CONFIG_PPC_BOOK3S) += tlb_hash$(BITS).o ifdef CONFIG_PPC_BOOK3S_64 obj-$(CONFIG_PPC_4K_PAGES) += hash64_4k.o obj-$(CONFIG_PPC_64K_PAGES) += hash64_64k.o @@ -47,7 +50,7 @@ ifdef CONFIG_PPC_PTDUMP obj-$(CONFIG_4xx) += dump_linuxpagetables-generic.o obj-$(CONFIG_PPC_8xx) += dump_linuxpagetables-8xx.o obj-$(CONFIG_PPC_BOOK3E_MMU) += dump_linuxpagetables-generic.o -obj-$(CONFIG_PPC_BOOK3S_32) += dump_linuxpagetables-generic.o +obj-$(CONFIG_PPC_BOOK3S_32) += dump_linuxpagetables-generic.o dump_bats.o dump_sr.o obj-$(CONFIG_PPC_BOOK3S_64) += dump_linuxpagetables-book3s64.o endif obj-$(CONFIG_PPC_HTDUMP) += dump_hashpagetable.o diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c index b6e7b5952ab5..e955539686a4 100644 --- a/arch/powerpc/mm/dma-noncoherent.c +++ b/arch/powerpc/mm/dma-noncoherent.c @@ -29,7 +29,7 @@ #include <linux/string.h> #include <linux/types.h> #include <linux/highmem.h> -#include <linux/dma-mapping.h> +#include <linux/dma-direct.h> #include <linux/export.h> #include <asm/tlbflush.h> @@ -151,8 +151,8 @@ static struct ppc_vm_region *ppc_vm_region_find(struct ppc_vm_region *head, unsi * Allocate DMA-coherent memory space and return both the kernel remapped * virtual and bus address for that space. */ -void * -__dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) +void *__dma_nommu_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { struct page *page; struct ppc_vm_region *c; @@ -223,7 +223,7 @@ __dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t /* * Set the "dma handle" */ - *handle = page_to_phys(page); + *dma_handle = phys_to_dma(dev, page_to_phys(page)); do { SetPageReserved(page); @@ -249,12 +249,12 @@ __dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t no_page: return NULL; } -EXPORT_SYMBOL(__dma_alloc_coherent); /* * free a page as defined by the above mapping. */ -void __dma_free_coherent(size_t size, void *vaddr) +void __dma_nommu_free_coherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle, unsigned long attrs) { struct ppc_vm_region *c; unsigned long flags, addr; @@ -309,7 +309,6 @@ void __dma_free_coherent(size_t size, void *vaddr) __func__, vaddr); dump_stack(); } -EXPORT_SYMBOL(__dma_free_coherent); /* * make an area consistent. @@ -401,7 +400,7 @@ EXPORT_SYMBOL(__dma_sync_page); /* * Return the PFN for a given cpu virtual address returned by - * __dma_alloc_coherent. This is used by dma_mmap_coherent() + * __dma_nommu_alloc_coherent. This is used by dma_mmap_coherent() */ unsigned long __dma_get_coherent_pfn(unsigned long cpu_addr) { diff --git a/arch/powerpc/mm/dump_bats.c b/arch/powerpc/mm/dump_bats.c new file mode 100644 index 000000000000..a0d23e96e841 --- /dev/null +++ b/arch/powerpc/mm/dump_bats.c @@ -0,0 +1,173 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright 2018, Christophe Leroy CS S.I. + * <christophe.leroy@c-s.fr> + * + * This dumps the content of BATS + */ + +#include <asm/debugfs.h> +#include <asm/pgtable.h> +#include <asm/cpu_has_feature.h> + +static char *pp_601(int k, int pp) +{ + if (pp == 0) + return k ? "NA" : "RWX"; + if (pp == 1) + return k ? "ROX" : "RWX"; + if (pp == 2) + return k ? "RWX" : "RWX"; + return k ? "ROX" : "ROX"; +} + +static void bat_show_601(struct seq_file *m, int idx, u32 lower, u32 upper) +{ + u32 blpi = upper & 0xfffe0000; + u32 k = (upper >> 2) & 3; + u32 pp = upper & 3; + phys_addr_t pbn = PHYS_BAT_ADDR(lower); + u32 bsm = lower & 0x3ff; + u32 size = (bsm + 1) << 17; + + seq_printf(m, "%d: ", idx); + if (!(lower & 0x40)) { + seq_puts(m, " -\n"); + return; + } + + seq_printf(m, "0x%08x-0x%08x ", blpi, blpi + size - 1); +#ifdef CONFIG_PHYS_64BIT + seq_printf(m, "0x%016llx ", pbn); +#else + seq_printf(m, "0x%08x ", pbn); +#endif + + seq_printf(m, "Kernel %s User %s", pp_601(k & 2, pp), pp_601(k & 1, pp)); + + if (lower & _PAGE_WRITETHRU) + seq_puts(m, "write through "); + if (lower & _PAGE_NO_CACHE) + seq_puts(m, "no cache "); + if (lower & _PAGE_COHERENT) + seq_puts(m, "coherent "); + seq_puts(m, "\n"); +} + +#define BAT_SHOW_601(_m, _n, _l, _u) bat_show_601(_m, _n, mfspr(_l), mfspr(_u)) + +static int bats_show_601(struct seq_file *m, void *v) +{ + seq_puts(m, "---[ Block Address Translation ]---\n"); + + BAT_SHOW_601(m, 0, SPRN_IBAT0L, SPRN_IBAT0U); + BAT_SHOW_601(m, 1, SPRN_IBAT1L, SPRN_IBAT1U); + BAT_SHOW_601(m, 2, SPRN_IBAT2L, SPRN_IBAT2U); + BAT_SHOW_601(m, 3, SPRN_IBAT3L, SPRN_IBAT3U); + + return 0; +} + +static void bat_show_603(struct seq_file *m, int idx, u32 lower, u32 upper, bool is_d) +{ + u32 bepi = upper & 0xfffe0000; + u32 bl = (upper >> 2) & 0x7ff; + u32 k = upper & 3; + phys_addr_t brpn = PHYS_BAT_ADDR(lower); + u32 size = (bl + 1) << 17; + + seq_printf(m, "%d: ", idx); + if (k == 0) { + seq_puts(m, " -\n"); + return; + } + + seq_printf(m, "0x%08x-0x%08x ", bepi, bepi + size - 1); +#ifdef CONFIG_PHYS_64BIT + seq_printf(m, "0x%016llx ", brpn); +#else + seq_printf(m, "0x%08x ", brpn); +#endif + + if (k == 1) + seq_puts(m, "User "); + else if (k == 2) + seq_puts(m, "Kernel "); + else + seq_puts(m, "Kernel/User "); + + if (lower & BPP_RX) + seq_puts(m, is_d ? "RO " : "EXEC "); + else if (lower & BPP_RW) + seq_puts(m, is_d ? "RW " : "EXEC "); + else + seq_puts(m, is_d ? "NA " : "NX "); + + if (lower & _PAGE_WRITETHRU) + seq_puts(m, "write through "); + if (lower & _PAGE_NO_CACHE) + seq_puts(m, "no cache "); + if (lower & _PAGE_COHERENT) + seq_puts(m, "coherent "); + if (lower & _PAGE_GUARDED) + seq_puts(m, "guarded "); + seq_puts(m, "\n"); +} + +#define BAT_SHOW_603(_m, _n, _l, _u, _d) bat_show_603(_m, _n, mfspr(_l), mfspr(_u), _d) + +static int bats_show_603(struct seq_file *m, void *v) +{ + seq_puts(m, "---[ Instruction Block Address Translation ]---\n"); + + BAT_SHOW_603(m, 0, SPRN_IBAT0L, SPRN_IBAT0U, false); + BAT_SHOW_603(m, 1, SPRN_IBAT1L, SPRN_IBAT1U, false); + BAT_SHOW_603(m, 2, SPRN_IBAT2L, SPRN_IBAT2U, false); + BAT_SHOW_603(m, 3, SPRN_IBAT3L, SPRN_IBAT3U, false); + if (mmu_has_feature(MMU_FTR_USE_HIGH_BATS)) { + BAT_SHOW_603(m, 4, SPRN_IBAT4L, SPRN_IBAT4U, false); + BAT_SHOW_603(m, 5, SPRN_IBAT5L, SPRN_IBAT5U, false); + BAT_SHOW_603(m, 6, SPRN_IBAT6L, SPRN_IBAT6U, false); + BAT_SHOW_603(m, 7, SPRN_IBAT7L, SPRN_IBAT7U, false); + } + + seq_puts(m, "\n---[ Data Block Address Translation ]---\n"); + + BAT_SHOW_603(m, 0, SPRN_DBAT0L, SPRN_DBAT0U, true); + BAT_SHOW_603(m, 1, SPRN_DBAT1L, SPRN_DBAT1U, true); + BAT_SHOW_603(m, 2, SPRN_DBAT2L, SPRN_DBAT2U, true); + BAT_SHOW_603(m, 3, SPRN_DBAT3L, SPRN_DBAT3U, true); + if (mmu_has_feature(MMU_FTR_USE_HIGH_BATS)) { + BAT_SHOW_603(m, 4, SPRN_DBAT4L, SPRN_DBAT4U, true); + BAT_SHOW_603(m, 5, SPRN_DBAT5L, SPRN_DBAT5U, true); + BAT_SHOW_603(m, 6, SPRN_DBAT6L, SPRN_DBAT6U, true); + BAT_SHOW_603(m, 7, SPRN_DBAT7L, SPRN_DBAT7U, true); + } + + return 0; +} + +static int bats_open(struct inode *inode, struct file *file) +{ + if (cpu_has_feature(CPU_FTR_601)) + return single_open(file, bats_show_601, NULL); + + return single_open(file, bats_show_603, NULL); +} + +static const struct file_operations bats_fops = { + .open = bats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init bats_init(void) +{ + struct dentry *debugfs_file; + + debugfs_file = debugfs_create_file("block_address_translation", 0400, + powerpc_debugfs_root, NULL, &bats_fops); + return debugfs_file ? 0 : -ENOMEM; +} +device_initcall(bats_init); diff --git a/arch/powerpc/mm/dump_linuxpagetables-generic.c b/arch/powerpc/mm/dump_linuxpagetables-generic.c index 1e3829ec1348..3fe98a0974c6 100644 --- a/arch/powerpc/mm/dump_linuxpagetables-generic.c +++ b/arch/powerpc/mm/dump_linuxpagetables-generic.c @@ -21,13 +21,11 @@ static const struct flag_info flag_array[] = { .set = "rw", .clear = "r ", }, { -#ifndef CONFIG_PPC_BOOK3S_32 .mask = _PAGE_EXEC, .val = _PAGE_EXEC, .set = " X ", .clear = " ", }, { -#endif .mask = _PAGE_PRESENT, .val = _PAGE_PRESENT, .set = "present", diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c index 2b74f8adf4d0..6aa41669ac1a 100644 --- a/arch/powerpc/mm/dump_linuxpagetables.c +++ b/arch/powerpc/mm/dump_linuxpagetables.c @@ -19,6 +19,7 @@ #include <linux/hugetlb.h> #include <linux/io.h> #include <linux/mm.h> +#include <linux/highmem.h> #include <linux/sched.h> #include <linux/seq_file.h> #include <asm/fixmap.h> diff --git a/arch/powerpc/mm/dump_sr.c b/arch/powerpc/mm/dump_sr.c new file mode 100644 index 000000000000..501843664bb9 --- /dev/null +++ b/arch/powerpc/mm/dump_sr.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright 2018, Christophe Leroy CS S.I. + * <christophe.leroy@c-s.fr> + * + * This dumps the content of Segment Registers + */ + +#include <asm/debugfs.h> + +static void seg_show(struct seq_file *m, int i) +{ + u32 val = mfsrin(i << 28); + + seq_printf(m, "0x%01x0000000-0x%01xfffffff ", i, i); + seq_printf(m, "Kern key %d ", (val >> 30) & 1); + seq_printf(m, "User key %d ", (val >> 29) & 1); + if (val & 0x80000000) { + seq_printf(m, "Device 0x%03x", (val >> 20) & 0x1ff); + seq_printf(m, "-0x%05x", val & 0xfffff); + } else { + if (val & 0x10000000) + seq_puts(m, "No Exec "); + seq_printf(m, "VSID 0x%06x", val & 0xffffff); + } + seq_puts(m, "\n"); +} + +static int sr_show(struct seq_file *m, void *v) +{ + int i; + + seq_puts(m, "---[ User Segments ]---\n"); + for (i = 0; i < TASK_SIZE >> 28; i++) + seg_show(m, i); + + seq_puts(m, "\n---[ Kernel Segments ]---\n"); + for (; i < 16; i++) + seg_show(m, i); + + return 0; +} + +static int sr_open(struct inode *inode, struct file *file) +{ + return single_open(file, sr_show, NULL); +} + +static const struct file_operations sr_fops = { + .open = sr_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init sr_init(void) +{ + struct dentry *debugfs_file; + + debugfs_file = debugfs_create_file("segment_registers", 0400, + powerpc_debugfs_root, NULL, &sr_fops); + return debugfs_file ? 0 : -ENOMEM; +} +device_initcall(sr_init); diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 1697e903bbf2..887f11bcf330 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -226,7 +226,9 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, static bool bad_kernel_fault(bool is_exec, unsigned long error_code, unsigned long address) { - if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT))) { + /* NX faults set DSISR_PROTFAULT on the 8xx, DSISR_NOEXEC_OR_G on others */ + if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT | + DSISR_PROTFAULT))) { printk_ratelimited(KERN_CRIT "kernel tried to execute" " exec-protected page (%lx) -" "exploit attempt? (uid: %d)\n", @@ -272,7 +274,7 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address, return false; if ((flags & FAULT_FLAG_WRITE) && (flags & FAULT_FLAG_USER) && - access_ok(VERIFY_READ, nip, sizeof(*nip))) { + access_ok(nip, sizeof(*nip))) { unsigned int inst; int res; @@ -341,10 +343,21 @@ static inline void cmo_account_page_fault(void) static inline void cmo_account_page_fault(void) { } #endif /* CONFIG_PPC_SMLPAR */ -#ifdef CONFIG_PPC_STD_MMU -static void sanity_check_fault(bool is_write, unsigned long error_code) +#ifdef CONFIG_PPC_BOOK3S +static void sanity_check_fault(bool is_write, bool is_user, + unsigned long error_code, unsigned long address) { /* + * Userspace trying to access kernel address, we get PROTFAULT for that. + */ + if (is_user && address >= TASK_SIZE) { + pr_crit_ratelimited("%s[%d]: User access of kernel address (%lx) - exploit attempt? (uid: %d)\n", + current->comm, current->pid, address, + from_kuid(&init_user_ns, current_uid())); + return; + } + + /* * For hash translation mode, we should never get a * PROTFAULT. Any update to pte to reduce access will result in us * removing the hash page table entry, thus resulting in a DSISR_NOHPTE @@ -373,12 +386,15 @@ static void sanity_check_fault(bool is_write, unsigned long error_code) * For radix, we can get prot fault for autonuma case, because radix * page table will have them marked noaccess for user. */ - if (!radix_enabled() && !is_write) - WARN_ON_ONCE(error_code & DSISR_PROTFAULT); + if (radix_enabled() || is_write) + return; + + WARN_ON_ONCE(error_code & DSISR_PROTFAULT); } #else -static void sanity_check_fault(bool is_write, unsigned long error_code) { } -#endif /* CONFIG_PPC_STD_MMU */ +static void sanity_check_fault(bool is_write, bool is_user, + unsigned long error_code, unsigned long address) { } +#endif /* CONFIG_PPC_BOOK3S */ /* * Define the correct "is_write" bit in error_code based @@ -435,7 +451,7 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address, } /* Additional sanity check(s) */ - sanity_check_fault(is_write, error_code); + sanity_check_fault(is_write, is_user, error_code, address); /* * The kernel should never take an execute fault nor should it @@ -636,21 +652,23 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) switch (TRAP(regs)) { case 0x300: case 0x380: - printk(KERN_ALERT "Unable to handle kernel paging request for " - "data at address 0x%08lx\n", regs->dar); + case 0xe00: + pr_alert("BUG: %s at 0x%08lx\n", + regs->dar < PAGE_SIZE ? "Kernel NULL pointer dereference" : + "Unable to handle kernel data access", regs->dar); break; case 0x400: case 0x480: - printk(KERN_ALERT "Unable to handle kernel paging request for " - "instruction fetch\n"); + pr_alert("BUG: Unable to handle kernel instruction fetch%s", + regs->nip < PAGE_SIZE ? " (NULL pointer?)\n" : "\n"); break; case 0x600: - printk(KERN_ALERT "Unable to handle kernel paging request for " - "unaligned access at address 0x%08lx\n", regs->dar); + pr_alert("BUG: Unable to handle kernel unaligned access at 0x%08lx\n", + regs->dar); break; default: - printk(KERN_ALERT "Unable to handle kernel paging request for " - "unknown fault\n"); + pr_alert("BUG: Unable to handle unknown paging fault at 0x%08lx\n", + regs->dar); break; } printk(KERN_ALERT "Faulting instruction address: 0x%08lx\n", diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S index 26acf6c8c20c..1e2df3e9f9ea 100644 --- a/arch/powerpc/mm/hash_low_32.S +++ b/arch/powerpc/mm/hash_low_32.S @@ -28,6 +28,7 @@ #include <asm/asm-offsets.h> #include <asm/export.h> #include <asm/feature-fixups.h> +#include <asm/code-patching-asm.h> #ifdef CONFIG_SMP .section .bss @@ -337,11 +338,13 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) rlwimi r5,r4,10,26,31 /* put in API (abbrev page index) */ SET_V(r5) /* set V (valid) bit */ + patch_site 0f, patch__hash_page_A0 + patch_site 1f, patch__hash_page_A1 + patch_site 2f, patch__hash_page_A2 /* Get the address of the primary PTE group in the hash table (r3) */ -_GLOBAL(hash_page_patch_A) - addis r0,r7,Hash_base@h /* base address of hash table */ - rlwimi r0,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */ - rlwinm r3,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */ +0: addis r0,r7,Hash_base@h /* base address of hash table */ +1: rlwimi r0,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */ +2: rlwinm r3,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */ xor r3,r3,r0 /* make primary hash */ li r0,8 /* PTEs/group */ @@ -366,10 +369,10 @@ _GLOBAL(hash_page_patch_A) bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */ beq+ found_slot + patch_site 0f, patch__hash_page_B /* Search the secondary PTEG for a matching PTE */ ori r5,r5,PTE_H /* set H (secondary hash) bit */ -_GLOBAL(hash_page_patch_B) - xoris r4,r3,Hash_msk>>16 /* compute secondary hash */ +0: xoris r4,r3,Hash_msk>>16 /* compute secondary hash */ xori r4,r4,(-PTEG_SIZE & 0xffff) addi r4,r4,-HPTE_SIZE mtctr r0 @@ -393,10 +396,10 @@ _GLOBAL(hash_page_patch_B) addi r6,r6,1 stw r6,primary_pteg_full@l(r4) + patch_site 0f, patch__hash_page_C /* Search the secondary PTEG for an empty slot */ ori r5,r5,PTE_H /* set H (secondary hash) bit */ -_GLOBAL(hash_page_patch_C) - xoris r4,r3,Hash_msk>>16 /* compute secondary hash */ +0: xoris r4,r3,Hash_msk>>16 /* compute secondary hash */ xori r4,r4,(-PTEG_SIZE & 0xffff) addi r4,r4,-HPTE_SIZE mtctr r0 @@ -577,11 +580,13 @@ _GLOBAL(flush_hash_pages) stwcx. r8,0,r5 /* update the pte */ bne- 33b + patch_site 0f, patch__flush_hash_A0 + patch_site 1f, patch__flush_hash_A1 + patch_site 2f, patch__flush_hash_A2 /* Get the address of the primary PTE group in the hash table (r3) */ -_GLOBAL(flush_hash_patch_A) - addis r8,r7,Hash_base@h /* base address of hash table */ - rlwimi r8,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */ - rlwinm r0,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */ +0: addis r8,r7,Hash_base@h /* base address of hash table */ +1: rlwimi r8,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */ +2: rlwinm r0,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */ xor r8,r0,r8 /* make primary hash */ /* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */ @@ -593,11 +598,11 @@ _GLOBAL(flush_hash_patch_A) bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */ beq+ 3f + patch_site 0f, patch__flush_hash_B /* Search the secondary PTEG for a matching PTE */ ori r11,r11,PTE_H /* set H (secondary hash) bit */ li r0,8 /* PTEs/group */ -_GLOBAL(flush_hash_patch_B) - xoris r12,r8,Hash_msk>>16 /* compute secondary hash */ +0: xoris r12,r8,Hash_msk>>16 /* compute secondary hash */ xori r12,r12,(-PTEG_SIZE & 0xffff) addi r12,r12,-HPTE_SIZE mtctr r0 diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 8cf035e68378..9e732bb2c84a 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -42,6 +42,8 @@ EXPORT_SYMBOL(HPAGE_SHIFT); #define hugepd_none(hpd) (hpd_val(hpd) == 0) +#define PTE_T_ORDER (__builtin_ffs(sizeof(pte_t)) - __builtin_ffs(sizeof(void *))) + pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz) { /* @@ -61,14 +63,17 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, int num_hugepd; if (pshift >= pdshift) { - cachep = hugepte_cache; + cachep = PGT_CACHE(PTE_T_ORDER); num_hugepd = 1 << (pshift - pdshift); + } else if (IS_ENABLED(CONFIG_PPC_8xx)) { + cachep = PGT_CACHE(PTE_INDEX_SIZE); + num_hugepd = 1; } else { cachep = PGT_CACHE(pdshift - pshift); num_hugepd = 1; } - new = kmem_cache_zalloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL)); + new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL)); BUG_ON(pshift > HUGEPD_SHIFT_MASK); BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); @@ -264,7 +269,7 @@ static void hugepd_free_rcu_callback(struct rcu_head *head) unsigned int i; for (i = 0; i < batch->index; i++) - kmem_cache_free(hugepte_cache, batch->ptes[i]); + kmem_cache_free(PGT_CACHE(PTE_T_ORDER), batch->ptes[i]); free_page((unsigned long)batch); } @@ -277,7 +282,7 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte) if (atomic_read(&tlb->mm->mm_users) < 2 || mm_is_thread_local(tlb->mm)) { - kmem_cache_free(hugepte_cache, hugepte); + kmem_cache_free(PGT_CACHE(PTE_T_ORDER), hugepte); put_cpu_var(hugepd_freelist_cur); return; } @@ -289,7 +294,7 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte) (*batchp)->ptes[(*batchp)->index++] = hugepte; if ((*batchp)->index == HUGEPD_FREELIST_SIZE) { - call_rcu_sched(&(*batchp)->rcu, hugepd_free_rcu_callback); + call_rcu(&(*batchp)->rcu, hugepd_free_rcu_callback); *batchp = NULL; } put_cpu_var(hugepd_freelist_cur); @@ -329,6 +334,9 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif if (shift >= pdshift) hugepd_free(tlb, hugepte); + else if (IS_ENABLED(CONFIG_PPC_8xx)) + pgtable_free_tlb(tlb, hugepte, + get_hugepd_cache_index(PTE_INDEX_SIZE)); else pgtable_free_tlb(tlb, hugepte, get_hugepd_cache_index(pdshift - shift)); @@ -652,7 +660,6 @@ static int __init hugepage_setup_sz(char *str) } __setup("hugepagesz=", hugepage_setup_sz); -struct kmem_cache *hugepte_cache; static int __init hugetlbpage_init(void) { int psize; @@ -699,24 +706,13 @@ static int __init hugetlbpage_init(void) * if we have pdshift and shift value same, we don't * use pgt cache for hugepd. */ - if (pdshift > shift) - pgtable_cache_add(pdshift - shift, NULL); + if (pdshift > shift && IS_ENABLED(CONFIG_PPC_8xx)) + pgtable_cache_add(PTE_INDEX_SIZE); + else if (pdshift > shift) + pgtable_cache_add(pdshift - shift); #if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) - else if (!hugepte_cache) { - /* - * Create a kmem cache for hugeptes. The bottom bits in - * the pte have size information encoded in them, so - * align them to allow this - */ - hugepte_cache = kmem_cache_create("hugepte-cache", - sizeof(pte_t), - HUGEPD_SHIFT_MASK + 1, - 0, NULL); - if (hugepte_cache == NULL) - panic("%s: Unable to create kmem cache " - "for hugeptes\n", __func__); - - } + else + pgtable_cache_add(PTE_T_ORDER); #endif } diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c index 2b656e67f2ea..1e6910eb70ed 100644 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c @@ -25,22 +25,40 @@ #include <asm/pgalloc.h> #include <asm/pgtable.h> -static void pgd_ctor(void *addr) -{ - memset(addr, 0, PGD_TABLE_SIZE); +#define CTOR(shift) static void ctor_##shift(void *addr) \ +{ \ + memset(addr, 0, sizeof(void *) << (shift)); \ } -static void pud_ctor(void *addr) -{ - memset(addr, 0, PUD_TABLE_SIZE); -} +CTOR(0); CTOR(1); CTOR(2); CTOR(3); CTOR(4); CTOR(5); CTOR(6); CTOR(7); +CTOR(8); CTOR(9); CTOR(10); CTOR(11); CTOR(12); CTOR(13); CTOR(14); CTOR(15); -static void pmd_ctor(void *addr) +static inline void (*ctor(int shift))(void *) { - memset(addr, 0, PMD_TABLE_SIZE); + BUILD_BUG_ON(MAX_PGTABLE_INDEX_SIZE != 15); + + switch (shift) { + case 0: return ctor_0; + case 1: return ctor_1; + case 2: return ctor_2; + case 3: return ctor_3; + case 4: return ctor_4; + case 5: return ctor_5; + case 6: return ctor_6; + case 7: return ctor_7; + case 8: return ctor_8; + case 9: return ctor_9; + case 10: return ctor_10; + case 11: return ctor_11; + case 12: return ctor_12; + case 13: return ctor_13; + case 14: return ctor_14; + case 15: return ctor_15; + } + return NULL; } -struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE]; +struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE + 1]; EXPORT_SYMBOL_GPL(pgtable_cache); /* used by kvm_hv module */ /* @@ -50,7 +68,7 @@ EXPORT_SYMBOL_GPL(pgtable_cache); /* used by kvm_hv module */ * everything else. Caches created by this function are used for all * the higher level pagetables, and for hugepage pagetables. */ -void pgtable_cache_add(unsigned shift, void (*ctor)(void *)) +void pgtable_cache_add(unsigned int shift) { char *name; unsigned long table_size = sizeof(void *) << shift; @@ -71,19 +89,19 @@ void pgtable_cache_add(unsigned shift, void (*ctor)(void *)) * moment, gcc doesn't seem to recognize is_power_of_2 as a * constant expression, so so much for that. */ BUG_ON(!is_power_of_2(minalign)); - BUG_ON((shift < 1) || (shift > MAX_PGTABLE_INDEX_SIZE)); + BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE); if (PGT_CACHE(shift)) return; /* Already have a cache of this size */ align = max_t(unsigned long, align, minalign); name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift); - new = kmem_cache_create(name, table_size, align, 0, ctor); + new = kmem_cache_create(name, table_size, align, 0, ctor(shift)); if (!new) panic("Could not allocate pgtable cache for order %d", shift); kfree(name); - pgtable_cache[shift - 1] = new; + pgtable_cache[shift] = new; pr_debug("Allocated pgtable cache for order %d\n", shift); } @@ -91,15 +109,15 @@ EXPORT_SYMBOL_GPL(pgtable_cache_add); /* used by kvm_hv module */ void pgtable_cache_init(void) { - pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor); + pgtable_cache_add(PGD_INDEX_SIZE); - if (PMD_CACHE_INDEX && !PGT_CACHE(PMD_CACHE_INDEX)) - pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor); + if (PMD_CACHE_INDEX) + pgtable_cache_add(PMD_CACHE_INDEX); /* * In all current configs, when the PUD index exists it's the * same size as either the pgd or pmd index except with THP enabled * on book3s 64 */ - if (PUD_CACHE_INDEX && !PGT_CACHE(PUD_CACHE_INDEX)) - pgtable_cache_add(PUD_CACHE_INDEX, pud_ctor); + if (PUD_CACHE_INDEX) + pgtable_cache_add(PUD_CACHE_INDEX); } diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 7a9886f98b0c..a5091c034747 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -188,15 +188,20 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node); for (; start < end; start += page_size) { - void *p; + void *p = NULL; int rc; if (vmemmap_populated(start, page_size)) continue; + /* + * Allocate from the altmap first if we have one. This may + * fail due to alignment issues when using 16MB hugepages, so + * fall back to system memory if the altmap allocation fail. + */ if (altmap) p = altmap_alloc_block_buf(page_size, altmap); - else + if (!p) p = vmemmap_alloc_block_buf(page_size, node); if (!p) return -ENOMEM; @@ -255,8 +260,15 @@ void __ref vmemmap_free(unsigned long start, unsigned long end, { unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; unsigned long page_order = get_order(page_size); + unsigned long alt_start = ~0, alt_end = ~0; + unsigned long base_pfn; start = _ALIGN_DOWN(start, page_size); + if (altmap) { + alt_start = altmap->base_pfn; + alt_end = altmap->base_pfn + altmap->reserve + + altmap->free + altmap->alloc + altmap->align; + } pr_debug("vmemmap_free %lx...%lx\n", start, end); @@ -280,8 +292,9 @@ void __ref vmemmap_free(unsigned long start, unsigned long end, page = pfn_to_page(addr >> PAGE_SHIFT); section_base = pfn_to_page(vmemmap_section_start(start)); nr_pages = 1 << page_order; + base_pfn = PHYS_PFN(addr); - if (altmap) { + if (base_pfn >= alt_start && base_pfn < alt_end) { vmem_altmap_free(altmap, nr_pages); } else if (PageReserved(page)) { /* allocated from bootmem */ diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 0a64fffabee1..33cc6f676fa6 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -139,7 +139,8 @@ int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap * } #ifdef CONFIG_MEMORY_HOTREMOVE -int __meminit arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) +int __meminit arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; @@ -246,35 +247,19 @@ static int __init mark_nonram_nosave(void) } #endif -static bool zone_limits_final; - /* - * The memory zones past TOP_ZONE are managed by generic mm code. - * These should be set to zero since that's what every other - * architecture does. + * Zones usage: + * + * We setup ZONE_DMA to be 31-bits on all platforms and ZONE_NORMAL to be + * everything else. GFP_DMA32 page allocations automatically fall back to + * ZONE_DMA. + * + * By using 31-bit unconditionally, we can exploit ARCH_ZONE_DMA_BITS to + * inform the generic DMA mapping code. 32-bit only devices (if not handled + * by an IOMMU anyway) will take a first dip into ZONE_NORMAL and get + * otherwise served by ZONE_DMA. */ -static unsigned long max_zone_pfns[MAX_NR_ZONES] = { - [0 ... TOP_ZONE ] = ~0UL, - [TOP_ZONE + 1 ... MAX_NR_ZONES - 1] = 0 -}; - -/* - * Restrict the specified zone and all more restrictive zones - * to be below the specified pfn. May not be called after - * paging_init(). - */ -void __init limit_zone_pfn(enum zone_type zone, unsigned long pfn_limit) -{ - int i; - - if (WARN_ON(zone_limits_final)) - return; - - for (i = zone; i >= 0; i--) { - if (max_zone_pfns[i] > pfn_limit) - max_zone_pfns[i] = pfn_limit; - } -} +static unsigned long max_zone_pfns[MAX_NR_ZONES]; /* * Find the least restrictive zone that is entirely below the @@ -324,11 +309,14 @@ void __init paging_init(void) printk(KERN_DEBUG "Memory hole size: %ldMB\n", (long int)((top_of_ram - total_ram) >> 20)); +#ifdef CONFIG_ZONE_DMA + max_zone_pfns[ZONE_DMA] = min(max_low_pfn, 0x7fffffffUL >> PAGE_SHIFT); +#endif + max_zone_pfns[ZONE_NORMAL] = max_low_pfn; #ifdef CONFIG_HIGHMEM - limit_zone_pfn(ZONE_NORMAL, lowmem_end_addr >> PAGE_SHIFT); + max_zone_pfns[ZONE_HIGHMEM] = max_pfn; #endif - limit_zone_pfn(TOP_ZONE, top_of_ram >> PAGE_SHIFT); - zone_limits_final = true; + free_area_init_nodes(max_zone_pfns); mark_nonram_nosave(); @@ -503,7 +491,7 @@ EXPORT_SYMBOL(flush_icache_user_range); void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { -#ifdef CONFIG_PPC_STD_MMU +#ifdef CONFIG_PPC_BOOK3S /* * We don't need to worry about _PAGE_PRESENT here because we are * called with either mm->page_table_lock held or ptl lock held @@ -541,7 +529,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, } hash_preload(vma->vm_mm, address, is_exec, trap); -#endif /* CONFIG_PPC_STD_MMU */ +#endif /* CONFIG_PPC_BOOK3S */ #if (defined(CONFIG_PPC_BOOK3E_64) || defined(CONFIG_PPC_FSL_BOOK3E)) \ && defined(CONFIG_HUGETLB_PAGE) if (is_vm_hugetlb_page(vma)) diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c index f84e14f23e50..bb52320b7369 100644 --- a/arch/powerpc/mm/mmu_context.c +++ b/arch/powerpc/mm/mmu_context.c @@ -15,6 +15,7 @@ #include <linux/sched/mm.h> #include <asm/mmu_context.h> +#include <asm/pgalloc.h> #if defined(CONFIG_PPC32) static inline void switch_mm_pgdir(struct task_struct *tsk, @@ -97,3 +98,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, switch_mmu_context(prev, next, tsk); } +#ifdef CONFIG_PPC32 +void arch_exit_mmap(struct mm_struct *mm) +{ + void *frag = pte_frag_get(&mm->context); + + if (frag) + pte_frag_destroy(frag); +} +#endif diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index 510f103d7813..f720c5cc0b5e 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -164,21 +164,6 @@ static void destroy_contexts(mm_context_t *ctx) } } -static void pte_frag_destroy(void *pte_frag) -{ - int count; - struct page *page; - - page = virt_to_page(pte_frag); - /* drop all the pending references */ - count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT; - /* We allow PTE_FRAG_NR fragments from a PTE page */ - if (atomic_sub_and_test(PTE_FRAG_NR - count, &page->pt_frag_refcount)) { - pgtable_page_dtor(page); - __free_page(page); - } -} - static void pmd_frag_destroy(void *pmd_frag) { int count; diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index 56c2234cc6ae..a712a650a8b6 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -36,6 +36,8 @@ struct mm_iommu_table_group_mem_t { u64 ua; /* userspace address */ u64 entries; /* number of entries in hpas[] */ u64 *hpas; /* vmalloc'ed */ +#define MM_IOMMU_TABLE_INVALID_HPA ((uint64_t)-1) + u64 dev_hpa; /* Device memory base address */ }; static long mm_iommu_adjust_locked_vm(struct mm_struct *mm, @@ -126,7 +128,8 @@ static int mm_iommu_move_page_from_cma(struct page *page) return 0; } -long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, +static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, + unsigned long entries, unsigned long dev_hpa, struct mm_iommu_table_group_mem_t **pmem) { struct mm_iommu_table_group_mem_t *mem; @@ -140,12 +143,6 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { - if ((mem->ua == ua) && (mem->entries == entries)) { - ++mem->used; - *pmem = mem; - goto unlock_exit; - } - /* Overlap? */ if ((mem->ua < (ua + (entries << PAGE_SHIFT))) && (ua < (mem->ua + @@ -156,11 +153,13 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, } - ret = mm_iommu_adjust_locked_vm(mm, entries, true); - if (ret) - goto unlock_exit; + if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) { + ret = mm_iommu_adjust_locked_vm(mm, entries, true); + if (ret) + goto unlock_exit; - locked_entries = entries; + locked_entries = entries; + } mem = kzalloc(sizeof(*mem), GFP_KERNEL); if (!mem) { @@ -168,6 +167,13 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, goto unlock_exit; } + if (dev_hpa != MM_IOMMU_TABLE_INVALID_HPA) { + mem->pageshift = __ffs(dev_hpa | (entries << PAGE_SHIFT)); + mem->dev_hpa = dev_hpa; + goto good_exit; + } + mem->dev_hpa = MM_IOMMU_TABLE_INVALID_HPA; + /* * For a starting point for a maximum page size calculation * we use @ua and @entries natural alignment to allow IOMMU pages @@ -236,6 +242,7 @@ populate: mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT; } +good_exit: atomic64_set(&mem->mapped, 1); mem->used = 1; mem->ua = ua; @@ -252,13 +259,31 @@ unlock_exit: return ret; } -EXPORT_SYMBOL_GPL(mm_iommu_get); + +long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries, + struct mm_iommu_table_group_mem_t **pmem) +{ + return mm_iommu_do_alloc(mm, ua, entries, MM_IOMMU_TABLE_INVALID_HPA, + pmem); +} +EXPORT_SYMBOL_GPL(mm_iommu_new); + +long mm_iommu_newdev(struct mm_struct *mm, unsigned long ua, + unsigned long entries, unsigned long dev_hpa, + struct mm_iommu_table_group_mem_t **pmem) +{ + return mm_iommu_do_alloc(mm, ua, entries, dev_hpa, pmem); +} +EXPORT_SYMBOL_GPL(mm_iommu_newdev); static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem) { long i; struct page *page = NULL; + if (!mem->hpas) + return; + for (i = 0; i < mem->entries; ++i) { if (!mem->hpas[i]) continue; @@ -300,6 +325,7 @@ static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem) long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem) { long ret = 0; + unsigned long entries, dev_hpa; mutex_lock(&mem_list_mutex); @@ -321,9 +347,12 @@ long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem) } /* @mapped became 0 so now mappings are disabled, release the region */ + entries = mem->entries; + dev_hpa = mem->dev_hpa; mm_iommu_release(mem); - mm_iommu_adjust_locked_vm(mm, mem->entries, false); + if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) + mm_iommu_adjust_locked_vm(mm, entries, false); unlock_exit: mutex_unlock(&mem_list_mutex); @@ -368,27 +397,32 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm, return ret; } -struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, +struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries) { struct mm_iommu_table_group_mem_t *mem, *ret = NULL; + mutex_lock(&mem_list_mutex); + list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { if ((mem->ua == ua) && (mem->entries == entries)) { ret = mem; + ++mem->used; break; } } + mutex_unlock(&mem_list_mutex); + return ret; } -EXPORT_SYMBOL_GPL(mm_iommu_find); +EXPORT_SYMBOL_GPL(mm_iommu_get); long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, unsigned long ua, unsigned int pageshift, unsigned long *hpa) { const long entry = (ua - mem->ua) >> PAGE_SHIFT; - u64 *va = &mem->hpas[entry]; + u64 *va; if (entry >= mem->entries) return -EFAULT; @@ -396,6 +430,12 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, if (pageshift > mem->pageshift) return -EFAULT; + if (!mem->hpas) { + *hpa = mem->dev_hpa + (ua - mem->ua); + return 0; + } + + va = &mem->hpas[entry]; *hpa = (*va & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK); return 0; @@ -406,7 +446,6 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, unsigned long ua, unsigned int pageshift, unsigned long *hpa) { const long entry = (ua - mem->ua) >> PAGE_SHIFT; - void *va = &mem->hpas[entry]; unsigned long *pa; if (entry >= mem->entries) @@ -415,7 +454,12 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, if (pageshift > mem->pageshift) return -EFAULT; - pa = (void *) vmalloc_to_phys(va); + if (!mem->hpas) { + *hpa = mem->dev_hpa + (ua - mem->ua); + return 0; + } + + pa = (void *) vmalloc_to_phys(&mem->hpas[entry]); if (!pa) return -EFAULT; @@ -435,6 +479,9 @@ extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua) if (!mem) return; + if (mem->dev_hpa != MM_IOMMU_TABLE_INVALID_HPA) + return; + entry = (ua - mem->ua) >> PAGE_SHIFT; va = &mem->hpas[entry]; @@ -445,6 +492,33 @@ extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua) *pa |= MM_IOMMU_TABLE_GROUP_PAGE_DIRTY; } +bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa, + unsigned int pageshift, unsigned long *size) +{ + struct mm_iommu_table_group_mem_t *mem; + unsigned long end; + + list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { + if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) + continue; + + end = mem->dev_hpa + (mem->entries << PAGE_SHIFT); + if ((mem->dev_hpa <= hpa) && (hpa < end)) { + /* + * Since the IOMMU page size might be bigger than + * PAGE_SIZE, the amount of preregistered memory + * starting from @hpa might be smaller than 1<<pageshift + * and the caller needs to distinguish this situation. + */ + *size = min(1UL << pageshift, end - hpa); + return true; + } + } + + return false; +} +EXPORT_SYMBOL_GPL(mm_iommu_is_devmem); + long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem) { if (atomic64_inc_not_zero(&mem->mapped)) diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index 2faca46ad720..22d71a58167f 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -372,7 +372,6 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm) { pr_hard("initing context for mm @%p\n", mm); -#ifdef CONFIG_PPC_MM_SLICES /* * We have MMU_NO_CONTEXT set to be ~0. Hence check * explicitly against context.id == 0. This ensures that we properly @@ -382,9 +381,9 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm) */ if (mm->context.id == 0) slice_init_new_context_exec(mm); -#endif mm->context.id = MMU_NO_CONTEXT; mm->context.active = 0; + pte_frag_set(&mm->context, NULL); return 0; } @@ -487,4 +486,3 @@ void __init mmu_context_init(void) next_context = FIRST_CONTEXT; nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT + 1; } - diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 8574fbbc45e0..c4a717da65eb 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -155,7 +155,7 @@ struct tlbcam { }; #endif -#if defined(CONFIG_6xx) || defined(CONFIG_FSL_BOOKE) || defined(CONFIG_PPC_8xx) +#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_FSL_BOOKE) || defined(CONFIG_PPC_8xx) /* 6xx have BATS */ /* FSL_BOOKE have TLBCAM */ /* 8xx have LTLB */ diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 3a048e98a132..87f0dd004295 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1178,7 +1178,7 @@ static long vphn_get_associativity(unsigned long cpu, switch (rc) { case H_FUNCTION: - printk(KERN_INFO + printk_once(KERN_INFO "VPHN is not supported. Disabling polling...\n"); stop_topology_update(); break; @@ -1475,7 +1475,7 @@ static int dt_update_callback(struct notifier_block *nb, switch (action) { case OF_RECONFIG_UPDATE_PROPERTY: - if (!of_prop_cmp(update->dn->type, "cpu") && + if (of_node_is_type(update->dn, "cpu") && !of_prop_cmp(update->prop->name, "ibm,associativity")) { u32 core_id; of_property_read_u32(update->dn, "reg", &core_id); diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index 9f93c9f985c5..f3c31f5e1026 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -244,6 +244,9 @@ static pmd_t *get_pmd_from_cache(struct mm_struct *mm) { void *pmd_frag, *ret; + if (PMD_FRAG_NR == 1) + return NULL; + spin_lock(&mm->page_table_lock); ret = mm->context.pmd_frag; if (ret) { @@ -322,91 +325,6 @@ void pmd_fragment_free(unsigned long *pmd) } } -static pte_t *get_pte_from_cache(struct mm_struct *mm) -{ - void *pte_frag, *ret; - - spin_lock(&mm->page_table_lock); - ret = mm->context.pte_frag; - if (ret) { - pte_frag = ret + PTE_FRAG_SIZE; - /* - * If we have taken up all the fragments mark PTE page NULL - */ - if (((unsigned long)pte_frag & ~PAGE_MASK) == 0) - pte_frag = NULL; - mm->context.pte_frag = pte_frag; - } - spin_unlock(&mm->page_table_lock); - return (pte_t *)ret; -} - -static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel) -{ - void *ret = NULL; - struct page *page; - - if (!kernel) { - page = alloc_page(PGALLOC_GFP | __GFP_ACCOUNT); - if (!page) - return NULL; - if (!pgtable_page_ctor(page)) { - __free_page(page); - return NULL; - } - } else { - page = alloc_page(PGALLOC_GFP); - if (!page) - return NULL; - } - - atomic_set(&page->pt_frag_refcount, 1); - - ret = page_address(page); - /* - * if we support only one fragment just return the - * allocated page. - */ - if (PTE_FRAG_NR == 1) - return ret; - spin_lock(&mm->page_table_lock); - /* - * If we find pgtable_page set, we return - * the allocated page with single fragement - * count. - */ - if (likely(!mm->context.pte_frag)) { - atomic_set(&page->pt_frag_refcount, PTE_FRAG_NR); - mm->context.pte_frag = ret + PTE_FRAG_SIZE; - } - spin_unlock(&mm->page_table_lock); - - return (pte_t *)ret; -} - -pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel) -{ - pte_t *pte; - - pte = get_pte_from_cache(mm); - if (pte) - return pte; - - return __alloc_for_ptecache(mm, kernel); -} - -void pte_fragment_free(unsigned long *table, int kernel) -{ - struct page *page = virt_to_page(table); - - BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0); - if (atomic_dec_and_test(&page->pt_frag_refcount)) { - if (!kernel) - pgtable_page_dtor(page); - __free_page(page); - } -} - static inline void pgtable_free(void *table, int index) { switch (index) { diff --git a/arch/powerpc/mm/pgtable-frag.c b/arch/powerpc/mm/pgtable-frag.c new file mode 100644 index 000000000000..a7b05214760c --- /dev/null +++ b/arch/powerpc/mm/pgtable-frag.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Handling Page Tables through page fragments + * + */ + +#include <linux/kernel.h> +#include <linux/gfp.h> +#include <linux/mm.h> +#include <linux/percpu.h> +#include <linux/hardirq.h> +#include <linux/hugetlb.h> +#include <asm/pgalloc.h> +#include <asm/tlbflush.h> +#include <asm/tlb.h> + +void pte_frag_destroy(void *pte_frag) +{ + int count; + struct page *page; + + page = virt_to_page(pte_frag); + /* drop all the pending references */ + count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT; + /* We allow PTE_FRAG_NR fragments from a PTE page */ + if (atomic_sub_and_test(PTE_FRAG_NR - count, &page->pt_frag_refcount)) { + pgtable_page_dtor(page); + __free_page(page); + } +} + +static pte_t *get_pte_from_cache(struct mm_struct *mm) +{ + void *pte_frag, *ret; + + if (PTE_FRAG_NR == 1) + return NULL; + + spin_lock(&mm->page_table_lock); + ret = pte_frag_get(&mm->context); + if (ret) { + pte_frag = ret + PTE_FRAG_SIZE; + /* + * If we have taken up all the fragments mark PTE page NULL + */ + if (((unsigned long)pte_frag & ~PAGE_MASK) == 0) + pte_frag = NULL; + pte_frag_set(&mm->context, pte_frag); + } + spin_unlock(&mm->page_table_lock); + return (pte_t *)ret; +} + +static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel) +{ + void *ret = NULL; + struct page *page; + + if (!kernel) { + page = alloc_page(PGALLOC_GFP | __GFP_ACCOUNT); + if (!page) + return NULL; + if (!pgtable_page_ctor(page)) { + __free_page(page); + return NULL; + } + } else { + page = alloc_page(PGALLOC_GFP); + if (!page) + return NULL; + } + + atomic_set(&page->pt_frag_refcount, 1); + + ret = page_address(page); + /* + * if we support only one fragment just return the + * allocated page. + */ + if (PTE_FRAG_NR == 1) + return ret; + spin_lock(&mm->page_table_lock); + /* + * If we find pgtable_page set, we return + * the allocated page with single fragement + * count. + */ + if (likely(!pte_frag_get(&mm->context))) { + atomic_set(&page->pt_frag_refcount, PTE_FRAG_NR); + pte_frag_set(&mm->context, ret + PTE_FRAG_SIZE); + } + spin_unlock(&mm->page_table_lock); + + return (pte_t *)ret; +} + +pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel) +{ + pte_t *pte; + + pte = get_pte_from_cache(mm); + if (pte) + return pte; + + return __alloc_for_ptecache(mm, kernel); +} + +void pte_fragment_free(unsigned long *table, int kernel) +{ + struct page *page = virt_to_page(table); + + BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0); + if (atomic_dec_and_test(&page->pt_frag_refcount)) { + if (!kernel) + pgtable_page_dtor(page); + __free_page(page); + } +} diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 010e1c616cb2..d3d61d29b4f1 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -74,7 +74,7 @@ static struct page *maybe_pte_to_page(pte_t pte) * support falls into the same category. */ -static pte_t set_pte_filter(pte_t pte) +static pte_t set_pte_filter_hash(pte_t pte) { if (radix_enabled()) return pte; @@ -93,14 +93,12 @@ static pte_t set_pte_filter(pte_t pte) return pte; } -static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma, - int dirty) -{ - return pte; -} - #else /* CONFIG_PPC_BOOK3S */ +static pte_t set_pte_filter_hash(pte_t pte) { return pte; } + +#endif /* CONFIG_PPC_BOOK3S */ + /* Embedded type MMU with HW exec support. This is a bit more complicated * as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so * instead we "filter out" the exec permission for non clean pages. @@ -109,6 +107,9 @@ static pte_t set_pte_filter(pte_t pte) { struct page *pg; + if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) + return set_pte_filter_hash(pte); + /* No exec permission in the first place, move on */ if (!pte_exec(pte) || !pte_looks_normal(pte)) return pte; @@ -138,6 +139,9 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma, { struct page *pg; + if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) + return pte; + /* So here, we only care about exec faults, as we use them * to recover lost _PAGE_EXEC and perform I$/D$ coherency * if necessary. Also if _PAGE_EXEC is already set, same deal, @@ -172,8 +176,6 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma, return pte_mkexec(pte); } -#endif /* CONFIG_PPC_BOOK3S */ - /* * set_pte stores a linux PTE into the linux page table. */ @@ -221,9 +223,9 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, } #ifdef CONFIG_HUGETLB_PAGE -extern int huge_ptep_set_access_flags(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep, - pte_t pte, int dirty) +int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty) { #ifdef HUGETLB_NEED_PRELOAD /* diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index bda3c6f1bd32..ded71126ce4c 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -43,34 +43,17 @@ EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */ extern char etext[], _stext[], _sinittext[], _einittext[]; -__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) +__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm) { - pte_t *pte; + if (!slab_is_available()) + return memblock_alloc(PTE_FRAG_SIZE, PTE_FRAG_SIZE); - if (slab_is_available()) { - pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); - } else { - pte = __va(memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE)); - if (pte) - clear_page(pte); - } - return pte; + return (pte_t *)pte_fragment_alloc(mm, 1); } -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) +pgtable_t pte_alloc_one(struct mm_struct *mm) { - struct page *ptepage; - - gfp_t flags = GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT; - - ptepage = alloc_pages(flags, 0); - if (!ptepage) - return NULL; - if (!pgtable_page_ctor(ptepage)) { - __free_page(ptepage); - return NULL; - } - return ptepage; + return (pgtable_t)pte_fragment_alloc(mm, 0); } void __iomem * @@ -160,7 +143,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *call * Don't allow anybody to remap normal RAM that we're using. * mem_init() sets high_memory so only do the check after that. */ - if (slab_is_available() && (p < virt_to_phys(high_memory)) && + if (slab_is_available() && p <= virt_to_phys(high_memory - 1) && page_is_ram(__phys_to_pfn(p))) { printk("__ioremap(): phys addr 0x%llx is RAM lr %ps\n", (unsigned long long)p, __builtin_return_address(0)); @@ -260,7 +243,7 @@ static void __init __mapin_ram_chunk(unsigned long offset, unsigned long top) ktext = ((char *)v >= _stext && (char *)v < etext) || ((char *)v >= _sinittext && (char *)v < _einittext); map_kernel_page(v, p, ktext ? PAGE_KERNEL_TEXT : PAGE_KERNEL); -#ifdef CONFIG_PPC_STD_MMU_32 +#ifdef CONFIG_PPC_BOOK3S_32 if (ktext) hash_preload(&init_mm, v, false, 0x300); #endif diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c index b271b283c785..587807763737 100644 --- a/arch/powerpc/mm/pkeys.c +++ b/arch/powerpc/mm/pkeys.c @@ -6,20 +6,21 @@ */ #include <asm/mman.h> +#include <asm/mmu_context.h> #include <asm/setup.h> #include <linux/pkeys.h> #include <linux/of_device.h> DEFINE_STATIC_KEY_TRUE(pkey_disabled); -bool pkey_execute_disable_supported; int pkeys_total; /* Total pkeys as per device tree */ -bool pkeys_devtree_defined; /* pkey property exported by device tree */ u32 initial_allocation_mask; /* Bits set for the initially allocated keys */ u32 reserved_allocation_mask; /* Bits set for reserved keys */ -u64 pkey_amr_mask; /* Bits in AMR not to be touched */ -u64 pkey_iamr_mask; /* Bits in AMR not to be touched */ -u64 pkey_uamor_mask; /* Bits in UMOR not to be touched */ -int execute_only_key = 2; +static bool pkey_execute_disable_supported; +static bool pkeys_devtree_defined; /* property exported by device tree */ +static u64 pkey_amr_mask; /* Bits in AMR not to be touched */ +static u64 pkey_iamr_mask; /* Bits in AMR not to be touched */ +static u64 pkey_uamor_mask; /* Bits in UMOR not to be touched */ +static int execute_only_key = 2; #define AMR_BITS_PER_PKEY 2 #define AMR_RD_BIT 0x1UL @@ -57,7 +58,7 @@ static inline bool pkey_mmu_enabled(void) return cpu_has_feature(CPU_FTR_PKEY); } -int pkey_initialize(void) +static int pkey_initialize(void) { int os_reserved, i; @@ -414,3 +415,13 @@ bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write, return pkey_access_permitted(vma_pkey(vma), write, execute); } + +void arch_dup_pkeys(struct mm_struct *oldmm, struct mm_struct *mm) +{ + if (static_branch_likely(&pkey_disabled)) + return; + + /* Duplicate the oldmm pkey state in mm: */ + mm_pkey_allocation_map(mm) = mm_pkey_allocation_map(oldmm); + mm->context.execute_only_pkey = oldmm->context.execute_only_pkey; +} diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index f6f575bae3bc..3f4193201ee7 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -31,6 +31,7 @@ #include <asm/prom.h> #include <asm/mmu.h> #include <asm/machdep.h> +#include <asm/code-patching.h> #include "mmu_decl.h" @@ -52,7 +53,7 @@ struct batrange { /* stores address ranges mapped by BATs */ phys_addr_t v_block_mapped(unsigned long va) { int b; - for (b = 0; b < 4; ++b) + for (b = 0; b < ARRAY_SIZE(bat_addrs); ++b) if (va >= bat_addrs[b].start && va < bat_addrs[b].limit) return bat_addrs[b].phys + (va - bat_addrs[b].start); return 0; @@ -64,7 +65,7 @@ phys_addr_t v_block_mapped(unsigned long va) unsigned long p_block_mapped(phys_addr_t pa) { int b; - for (b = 0; b < 4; ++b) + for (b = 0; b < ARRAY_SIZE(bat_addrs); ++b) if (pa >= bat_addrs[b].phys && pa < (bat_addrs[b].limit-bat_addrs[b].start) +bat_addrs[b].phys) @@ -182,22 +183,8 @@ void __init MMU_init_hw(void) unsigned int hmask, mb, mb2; unsigned int n_hpteg, lg_n_hpteg; - extern unsigned int hash_page_patch_A[]; - extern unsigned int hash_page_patch_B[], hash_page_patch_C[]; - extern unsigned int hash_page[]; - extern unsigned int flush_hash_patch_A[], flush_hash_patch_B[]; - - if (!mmu_has_feature(MMU_FTR_HPTE_TABLE)) { - /* - * Put a blr (procedure return) instruction at the - * start of hash_page, since we can still get DSI - * exceptions on a 603. - */ - hash_page[0] = 0x4e800020; - flush_icache_range((unsigned long) &hash_page[0], - (unsigned long) &hash_page[1]); + if (!mmu_has_feature(MMU_FTR_HPTE_TABLE)) return; - } if ( ppc_md.progress ) ppc_md.progress("hash:enter", 0x105); @@ -244,31 +231,19 @@ void __init MMU_init_hw(void) if (lg_n_hpteg > 16) mb2 = 16 - LG_HPTEG_SIZE; - hash_page_patch_A[0] = (hash_page_patch_A[0] & ~0xffff) - | ((unsigned int)(Hash) >> 16); - hash_page_patch_A[1] = (hash_page_patch_A[1] & ~0x7c0) | (mb << 6); - hash_page_patch_A[2] = (hash_page_patch_A[2] & ~0x7c0) | (mb2 << 6); - hash_page_patch_B[0] = (hash_page_patch_B[0] & ~0xffff) | hmask; - hash_page_patch_C[0] = (hash_page_patch_C[0] & ~0xffff) | hmask; - - /* - * Ensure that the locations we've patched have been written - * out from the data cache and invalidated in the instruction - * cache, on those machines with split caches. - */ - flush_icache_range((unsigned long) &hash_page_patch_A[0], - (unsigned long) &hash_page_patch_C[1]); + modify_instruction_site(&patch__hash_page_A0, 0xffff, (unsigned int)Hash >> 16); + modify_instruction_site(&patch__hash_page_A1, 0x7c0, mb << 6); + modify_instruction_site(&patch__hash_page_A2, 0x7c0, mb2 << 6); + modify_instruction_site(&patch__hash_page_B, 0xffff, hmask); + modify_instruction_site(&patch__hash_page_C, 0xffff, hmask); /* * Patch up the instructions in hashtable.S:flush_hash_page */ - flush_hash_patch_A[0] = (flush_hash_patch_A[0] & ~0xffff) - | ((unsigned int)(Hash) >> 16); - flush_hash_patch_A[1] = (flush_hash_patch_A[1] & ~0x7c0) | (mb << 6); - flush_hash_patch_A[2] = (flush_hash_patch_A[2] & ~0x7c0) | (mb2 << 6); - flush_hash_patch_B[0] = (flush_hash_patch_B[0] & ~0xffff) | hmask; - flush_icache_range((unsigned long) &flush_hash_patch_A[0], - (unsigned long) &flush_hash_patch_B[1]); + modify_instruction_site(&patch__flush_hash_A0, 0xffff, (unsigned int)Hash >> 16); + modify_instruction_site(&patch__flush_hash_A1, 0x7c0, mb << 6); + modify_instruction_site(&patch__flush_hash_A2, 0x7c0, mb2 << 6); + modify_instruction_site(&patch__flush_hash_B, 0xffff, hmask); if ( ppc_md.progress ) ppc_md.progress("hash:done", 0x205); } diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index c3fdf2969d9f..bc3914d54e26 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -19,6 +19,7 @@ #include <asm/mmu.h> #include <asm/mmu_context.h> #include <asm/paca.h> +#include <asm/ppc-opcode.h> #include <asm/cputable.h> #include <asm/cacheflush.h> #include <asm/smp.h> @@ -58,27 +59,19 @@ static inline unsigned long mk_vsid_data(unsigned long ea, int ssize, return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags); } -static void assert_slb_exists(unsigned long ea) +static void assert_slb_presence(bool present, unsigned long ea) { #ifdef CONFIG_DEBUG_VM unsigned long tmp; WARN_ON_ONCE(mfmsr() & MSR_EE); - asm volatile("slbfee. %0, %1" : "=r"(tmp) : "r"(ea) : "cr0"); - WARN_ON(tmp == 0); -#endif -} - -static void assert_slb_notexists(unsigned long ea) -{ -#ifdef CONFIG_DEBUG_VM - unsigned long tmp; + if (!cpu_has_feature(CPU_FTR_ARCH_206)) + return; - WARN_ON_ONCE(mfmsr() & MSR_EE); + asm volatile(__PPC_SLBFEE_DOT(%0, %1) : "=r"(tmp) : "r"(ea) : "cr0"); - asm volatile("slbfee. %0, %1" : "=r"(tmp) : "r"(ea) : "cr0"); - WARN_ON(tmp != 0); + WARN_ON(present == (tmp == 0)); #endif } @@ -114,7 +107,7 @@ static inline void create_shadowed_slbe(unsigned long ea, int ssize, */ slb_shadow_update(ea, ssize, flags, index); - assert_slb_notexists(ea); + assert_slb_presence(false, ea); asm volatile("slbmte %0,%1" : : "r" (mk_vsid_data(ea, ssize, flags)), "r" (mk_esid_data(ea, ssize, index)) @@ -137,7 +130,7 @@ void __slb_restore_bolted_realmode(void) "r" (be64_to_cpu(p->save_area[index].esid))); } - assert_slb_exists(local_paca->kstack); + assert_slb_presence(true, local_paca->kstack); } /* @@ -185,7 +178,7 @@ void slb_flush_and_restore_bolted(void) :: "r" (be64_to_cpu(p->save_area[KSTACK_INDEX].vsid)), "r" (be64_to_cpu(p->save_area[KSTACK_INDEX].esid)) : "memory"); - assert_slb_exists(get_paca()->kstack); + assert_slb_presence(true, get_paca()->kstack); get_paca()->slb_cache_ptr = 0; @@ -443,9 +436,9 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) ea = (unsigned long) get_paca()->slb_cache[i] << SID_SHIFT; /* - * Could assert_slb_exists here, but hypervisor - * or machine check could have come in and - * removed the entry at this point. + * Could assert_slb_presence(true) here, but + * hypervisor or machine check could have come + * in and removed the entry at this point. */ slbie_data = ea; @@ -676,7 +669,7 @@ static long slb_insert_entry(unsigned long ea, unsigned long context, * User preloads should add isync afterwards in case the kernel * accesses user memory before it returns to userspace with rfid. */ - assert_slb_notexists(ea); + assert_slb_presence(false, ea); asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data)); barrier(); @@ -715,7 +708,7 @@ static long slb_allocate_kernel(unsigned long ea, unsigned long id) return -EFAULT; if (ea < H_VMALLOC_END) - flags = get_paca()->vmalloc_sllp; + flags = local_paca->vmalloc_sllp; else flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp; } else { diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c index 3327551c8b47..5e4178790dee 100644 --- a/arch/powerpc/mm/subpage-prot.c +++ b/arch/powerpc/mm/subpage-prot.c @@ -214,7 +214,7 @@ SYSCALL_DEFINE3(subpage_prot, unsigned long, addr, return 0; } - if (!access_ok(VERIFY_READ, map, (len >> PAGE_SHIFT) * sizeof(u32))) + if (!access_ok(map, (len >> PAGE_SHIFT) * sizeof(u32))) return -EFAULT; down_write(&mm->mmap_sem); diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index 7fd20c52a8ec..9ed90064f542 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S @@ -70,6 +70,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) std r15,EX_TLB_R15(r12) std r10,EX_TLB_CR(r12) #ifdef CONFIG_PPC_FSL_BOOK3E +START_BTB_FLUSH_SECTION + mfspr r11, SPRN_SRR1 + andi. r10,r11,MSR_PR + beq 1f + BTB_FLUSH(r10) +1: +END_BTB_FLUSH_SECTION std r7,EX_TLB_R7(r12) #endif TLB_MISS_PROLOG_STATS |