From caff0710ebf6f2c44cbd2b8b31fd6329148bed2e Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 26 Sep 2006 10:52:31 +0200 Subject: [PATCH] initialize end of memory variables as early as possible While an earlier patch already did a small step into that direction, this patch moves initialization of all memory end variables to as early as possible, so that dependent code doesn't need to check whether these variables have already been set. Also, remove a misleading (perhaps just outdated) comment, and make static a variable only used in a single file. Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen --- arch/x86_64/mm/init.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/x86_64/mm') diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index d14fb2dfbfc4..d40134bd6399 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -597,12 +597,6 @@ void __init mem_init(void) pci_iommu_alloc(); - /* How many end-of-memory variables you have, grandma! */ - max_low_pfn = end_pfn; - max_pfn = end_pfn; - num_physpages = end_pfn; - high_memory = (void *) __va(end_pfn * PAGE_SIZE); - /* clear the zero-page */ memset(empty_zero_page, 0, PAGE_SIZE); -- cgit From 2c8c0e6b8d7700a990da8d24eff767f9ca223b96 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:32 +0200 Subject: [PATCH] Convert x86-64 to early param Instead of hackish manual parsing Requires earlier i386 patchkit, but also fixes i386 early_printk again. I removed some obsolete really early parameters which didn't do anything useful. Also made a few parameters that needed it early (mostly oops printing setup) Also removed one panic check that wasn't visible without early console anyways (the early console is now initialized after that panic) This cleans up a lot of code. Signed-off-by: Andi Kleen --- arch/x86_64/mm/numa.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/x86_64/mm') diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index b2fac14baac0..d64d6d93d04b 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c @@ -348,9 +348,10 @@ void __init paging_init(void) } } -/* [numa=off] */ -__init int numa_setup(char *opt) +static __init int numa_setup(char *opt) { + if (!opt) + return -EINVAL; if (!strncmp(opt,"off",3)) numa_off = 1; #ifdef CONFIG_NUMA_EMU @@ -366,9 +367,11 @@ __init int numa_setup(char *opt) if (!strncmp(opt,"hotadd=", 7)) hotadd_percent = simple_strtoul(opt+7, NULL, 10); #endif - return 1; + return 0; } +early_param("numa", numa_setup); + /* * Setup early cpu_to_node. * -- cgit From c31fbb1ad890b11f037c16496e53f28877c12722 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:33 +0200 Subject: [PATCH] Clean up acpi_numa variable Move it into srat.c No need to clutter up setup.c for it And remove use in setup.c completely - it only guarded a printk which can be done unconditionally. Signed-off-by: Andi Kleen --- arch/x86_64/mm/srat.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86_64/mm') diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c index 502fce65e96a..ca10701e7a90 100644 --- a/arch/x86_64/mm/srat.c +++ b/arch/x86_64/mm/srat.c @@ -21,6 +21,8 @@ #include #include +int acpi_numa __initdata; + #if (defined(CONFIG_ACPI_HOTPLUG_MEMORY) || \ defined(CONFIG_ACPI_HOTPLUG_MEMORY_MODULE)) \ && !defined(CONFIG_MEMORY_HOTPLUG) -- cgit From dd2994f619752fb731f21c89ad16536dd6673948 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:33 +0200 Subject: [PATCH] Add sparse annotations to quiet sparse in arch/x86_64/mm/fault.c Fixes linux/arch/x86_64/mm/fault.c:125:7: warning: incorrect type in argument 1 (different address spaces) linux/arch/x86_64/mm/fault.c:125:7: expected void [noderef] * linux/arch/x86_64/mm/fault.c:125:7: got unsigned char *[assigned] instr linux/arch/x86_64/mm/fault.c:163:8: warning: incorrect type in argument 1 (different address spaces) linux/arch/x86_64/mm/fault.c:163:8: expected void [noderef] * linux/arch/x86_64/mm/fault.c:163:8: got unsigned char *[assigned] instr linux/arch/x86_64/mm/fault.c:179:9: warning: incorrect type in argument 1 (different address spaces) linux/arch/x86_64/mm/fault.c:179:9: expected void [noderef] * linux/arch/x86_64/mm/fault.c:179:9: got unsigned long * Signed-off-by: Andi Kleen --- arch/x86_64/mm/fault.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86_64/mm') diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index ac8ea66ccb94..449a437f5d70 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -102,7 +102,7 @@ void bust_spinlocks(int yes) static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, unsigned long error_code) { - unsigned char *instr; + unsigned char __user *instr; int scan_more = 1; int prefetch = 0; unsigned char *max_instr; @@ -111,7 +111,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, if (error_code & PF_INSTR) return 0; - instr = (unsigned char *)convert_rip_to_linear(current, regs); + instr = (unsigned char __user *)convert_rip_to_linear(current, regs); max_instr = instr + 15; if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE) @@ -122,7 +122,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, unsigned char instr_hi; unsigned char instr_lo; - if (__get_user(opcode, instr)) + if (__get_user(opcode, (char __user *)instr)) break; instr_hi = opcode & 0xf0; @@ -160,7 +160,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, case 0x00: /* Prefetch instruction is 0x0F0D or 0x0F18 */ scan_more = 0; - if (__get_user(opcode, instr)) + if (__get_user(opcode, (char __user *)instr)) break; prefetch = (instr_lo == 0xF) && (opcode == 0x0D || opcode == 0x18); @@ -176,7 +176,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, static int bad_address(void *p) { unsigned long dummy; - return __get_user(dummy, (unsigned long *)p); + return __get_user(dummy, (unsigned long __user *)p); } void dump_pagetable(unsigned long address) -- cgit From 273819a2d982faace30e587b86a0683882251fe7 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:35 +0200 Subject: [PATCH] make fault notifier unconditional and export it It's needed for external debuggers and overhead is very small. Also make the actual notifier chain they use static Cc: jbeulich@novell.com Signed-off-by: Andi Kleen --- arch/x86_64/mm/fault.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'arch/x86_64/mm') diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index 449a437f5d70..3ae3e7bb2fce 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -40,8 +40,7 @@ #define PF_RSVD (1<<3) #define PF_INSTR (1<<4) -#ifdef CONFIG_KPROBES -ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); +static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); /* Hook to register for page fault notifications */ int register_page_fault_notifier(struct notifier_block *nb) @@ -49,11 +48,13 @@ int register_page_fault_notifier(struct notifier_block *nb) vmalloc_sync_all(); return atomic_notifier_chain_register(¬ify_page_fault_chain, nb); } +EXPORT_SYMBOL_GPL(register_page_fault_notifier); int unregister_page_fault_notifier(struct notifier_block *nb) { return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb); } +EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); static inline int notify_page_fault(enum die_val val, const char *str, struct pt_regs *regs, long err, int trap, int sig) @@ -67,13 +68,6 @@ static inline int notify_page_fault(enum die_val val, const char *str, }; return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args); } -#else -static inline int notify_page_fault(enum die_val val, const char *str, - struct pt_regs *regs, long err, int trap, int sig) -{ - return NOTIFY_DONE; -} -#endif void bust_spinlocks(int yes) { -- cgit From 6ad916581181a105d7832a7dec9e1eb58f7a1621 Mon Sep 17 00:00:00 2001 From: Keith Mannthey Date: Tue, 26 Sep 2006 10:52:36 +0200 Subject: [PATCH] x86_64 kernel mapping fix Fix for the x86_64 kernel mapping code. Without this patch the update path only inits one pmd_page worth of memory and tramples any entries on it. now the calling convention to phys_pmd_init and phys_init is to always pass a [pmd/pud] page not an offset within a page. Signed-off-by: Keith Mannthey Signed-off-by: Andi Kleen Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton --- arch/x86_64/mm/init.c | 51 ++++++++++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 25 deletions(-) (limited to 'arch/x86_64/mm') diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index d40134bd6399..984155b75e4c 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -250,12 +250,13 @@ __init void early_iounmap(void *addr, unsigned long size) } static void __meminit -phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end) +phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end) { - int i; + int i = pmd_index(address); - for (i = 0; i < PTRS_PER_PMD; pmd++, i++, address += PMD_SIZE) { + for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) { unsigned long entry; + pmd_t *pmd = pmd_page + pmd_index(address); if (address >= end) { if (!after_bootmem) @@ -263,6 +264,10 @@ phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end) set_pmd(pmd, __pmd(0)); break; } + + if (pmd_val(*pmd)) + continue; + entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address; entry &= __supported_pte_mask; set_pmd(pmd, __pmd(entry)); @@ -272,45 +277,41 @@ phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end) static void __meminit phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end) { - pmd_t *pmd = pmd_offset(pud, (unsigned long)__va(address)); - - if (pmd_none(*pmd)) { - spin_lock(&init_mm.page_table_lock); - phys_pmd_init(pmd, address, end); - spin_unlock(&init_mm.page_table_lock); - __flush_tlb_all(); - } + pmd_t *pmd = pmd_offset(pud,0); + spin_lock(&init_mm.page_table_lock); + phys_pmd_init(pmd, address, end); + spin_unlock(&init_mm.page_table_lock); + __flush_tlb_all(); } -static void __meminit phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) +static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) { - long i = pud_index(address); - - pud = pud + i; + int i = pud_index(addr); - if (after_bootmem && pud_val(*pud)) { - phys_pmd_update(pud, address, end); - return; - } - for (; i < PTRS_PER_PUD; pud++, i++) { + for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) { int map; - unsigned long paddr, pmd_phys; + unsigned long pmd_phys; + pud_t *pud = pud_page + pud_index(addr); pmd_t *pmd; - paddr = (address & PGDIR_MASK) + i*PUD_SIZE; - if (paddr >= end) + if (addr >= end) break; - if (!after_bootmem && !e820_any_mapped(paddr, paddr+PUD_SIZE, 0)) { + if (!after_bootmem && !e820_any_mapped(addr,addr+PUD_SIZE,0)) { set_pud(pud, __pud(0)); continue; } + if (pud_val(*pud)) { + phys_pmd_update(pud, addr, end); + continue; + } + pmd = alloc_low_page(&map, &pmd_phys); spin_lock(&init_mm.page_table_lock); set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); - phys_pmd_init(pmd, paddr, end); + phys_pmd_init(pmd, addr, end); spin_unlock(&init_mm.page_table_lock); unmap_low_page(map); } -- cgit From 1164c9994fe37d5b7035a5cf9328c98dd38af7b1 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 26 Sep 2006 10:52:37 +0200 Subject: [PATCH] make numa_emulation() __init Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- arch/x86_64/mm/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64/mm') diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index d64d6d93d04b..322bf45fc36a 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c @@ -225,7 +225,7 @@ void __init numa_init_array(void) int numa_fake __initdata = 0; /* Numa emulation */ -static int numa_emulation(unsigned long start_pfn, unsigned long end_pfn) +static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn) { int i; struct bootnode nodes[MAX_NUMNODES]; -- cgit From d3cf7f061521c78ad62e275eb6fbdc8f43fc75a7 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:37 +0200 Subject: [PATCH] Remove bogus warning from early_ioremap It is correct for its only caller right now, but not for possible future others. Signed-off-by: Andi Kleen --- arch/x86_64/mm/init.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86_64/mm') diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index 984155b75e4c..53b49c86d7e0 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -229,7 +229,6 @@ __init void *early_ioremap(unsigned long addr, unsigned long size) /* actually usually some more */ if (size >= LARGE_PAGE_SIZE) { - printk("SMBIOS area too long %lu\n", size); return NULL; } set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE)); -- cgit From df992848f5aa803fcacd2c5e7d67034bb89e3fa3 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:37 +0200 Subject: [PATCH] Fix pte_exec/mkexec and use it in change_page_attr() Fix the pte_exec/mkexec page table accessor functions to really use the NX bit. Previously they only checked the USER bit, but weren't actually used for anything. Then use them in change_page_attr() to manipulate the NX bit properly. Signed-off-by: Andi Kleen --- arch/x86_64/mm/pageattr.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86_64/mm') diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c index 2685b1f3671c..01591b649f4e 100644 --- a/arch/x86_64/mm/pageattr.c +++ b/arch/x86_64/mm/pageattr.c @@ -190,10 +190,12 @@ int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot) * lowmem */ if (__pa(address) < KERNEL_TEXT_SIZE) { unsigned long addr2; - pgprot_t prot2 = prot; + pgprot_t prot2; addr2 = __START_KERNEL_map + __pa(address); - pgprot_val(prot2) &= ~_PAGE_NX; - err = __change_page_attr(addr2, pfn, prot2, PAGE_KERNEL_EXEC); + /* Make sure the kernel mappings stay executable */ + prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot))); + err = __change_page_attr(addr2, pfn, prot2, + PAGE_KERNEL_EXEC); } } up_write(&init_mm.mmap_sem); -- cgit From 5e6b0bfe5b452957b7be4b6ef181cd41880f8359 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:37 +0200 Subject: [PATCH] Use proper accessors to change PSE bits in change_page_attr() Use normal pte accessors in change_page_attr() to access the PSE bits. Signed-off-by: Andi Kleen --- arch/x86_64/mm/pageattr.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'arch/x86_64/mm') diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c index 01591b649f4e..3e231d762aaa 100644 --- a/arch/x86_64/mm/pageattr.c +++ b/arch/x86_64/mm/pageattr.c @@ -108,8 +108,8 @@ static void revert_page(unsigned long address, pgprot_t ref_prot) BUG_ON(pud_none(*pud)); pmd = pmd_offset(pud, address); BUG_ON(pmd_val(*pmd) & _PAGE_PSE); - pgprot_val(ref_prot) |= _PAGE_PSE; large_pte = mk_pte_phys(__pa(address) & LARGE_PAGE_MASK, ref_prot); + large_pte = pte_mkhuge(large_pte); set_pte((pte_t *)pmd, large_pte); } @@ -119,32 +119,28 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot, { pte_t *kpte; struct page *kpte_page; - unsigned kpte_flags; pgprot_t ref_prot2; kpte = lookup_address(address); if (!kpte) return 0; kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK); - kpte_flags = pte_val(*kpte); if (pgprot_val(prot) != pgprot_val(ref_prot)) { - if ((kpte_flags & _PAGE_PSE) == 0) { + if (!pte_huge(*kpte)) { set_pte(kpte, pfn_pte(pfn, prot)); } else { /* * split_large_page will take the reference for this * change_page_attr on the split page. */ - struct page *split; - ref_prot2 = __pgprot(pgprot_val(pte_pgprot(*lookup_address(address))) & ~(1<<_PAGE_BIT_PSE)); - + ref_prot2 = pte_pgprot(pte_clrhuge(*kpte)); split = split_large_page(address, prot, ref_prot2); if (!split) return -ENOMEM; - set_pte(kpte,mk_pte(split, ref_prot2)); + set_pte(kpte, mk_pte(split, ref_prot2)); kpte_page = split; - } + } page_private(kpte_page)++; - } else if ((kpte_flags & _PAGE_PSE) == 0) { + } else if (!pte_huge(*kpte)) { set_pte(kpte, pfn_pte(pfn, ref_prot)); BUG_ON(page_private(kpte_page) == 0); page_private(kpte_page)--; -- cgit From 0637a70a5db98182d9ad3d6ae1ee30acf20afde9 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:41 +0200 Subject: [PATCH] x86: Allow disabling early pci scans with pci=noearly or disallowing conf1 Some buggy systems can machine check when config space accesses happen for some non existent devices. i386/x86-64 do some early device scans that might trigger this. Allow pci=noearly to disable this. Also when type 1 is disabling also don't do any early accesses which are always type1. This moves the pci= configuration parsing to be a early parameter. I don't think this can break anything because it only changes a single global that is only used by PCI. Cc: gregkh@suse.de Cc: Trammell Hudson Signed-off-by: Andi Kleen --- arch/x86_64/mm/k8topology.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86_64/mm') diff --git a/arch/x86_64/mm/k8topology.c b/arch/x86_64/mm/k8topology.c index 7c45c2d2b8b2..5cf594f9230d 100644 --- a/arch/x86_64/mm/k8topology.c +++ b/arch/x86_64/mm/k8topology.c @@ -54,6 +54,9 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) nodes_clear(nodes_parsed); + if (!early_pci_allowed()) + return -1; + nb = find_northbridge(); if (nb < 0) return nb; -- cgit