diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/gup_benchmark.c | 8 | ||||
-rw-r--r-- | mm/huge_memory.c | 38 | ||||
-rw-r--r-- | mm/hugetlb.c | 51 | ||||
-rw-r--r-- | mm/kasan/common.c | 36 | ||||
-rw-r--r-- | mm/memcontrol.c | 37 | ||||
-rw-r--r-- | mm/memory.c | 136 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 31 | ||||
-rw-r--r-- | mm/mempolicy.c | 10 | ||||
-rw-r--r-- | mm/memremap.c | 2 | ||||
-rw-r--r-- | mm/migrate.c | 23 | ||||
-rw-r--r-- | mm/mmap.c | 6 | ||||
-rw-r--r-- | mm/oom_kill.c | 2 | ||||
-rw-r--r-- | mm/page-writeback.c | 10 | ||||
-rw-r--r-- | mm/page_alloc.c | 61 | ||||
-rw-r--r-- | mm/shmem.c | 7 | ||||
-rw-r--r-- | mm/slab.c | 4 | ||||
-rw-r--r-- | mm/slab_common.c | 3 | ||||
-rw-r--r-- | mm/slub.c | 2 | ||||
-rw-r--r-- | mm/sparse.c | 9 | ||||
-rw-r--r-- | mm/vmalloc.c | 133 | ||||
-rw-r--r-- | mm/vmscan.c | 2 | ||||
-rw-r--r-- | mm/zsmalloc.c | 5 |
22 files changed, 373 insertions, 243 deletions
diff --git a/mm/gup_benchmark.c b/mm/gup_benchmark.c index 7dd602d7f8db..ad9d5b1c4473 100644 --- a/mm/gup_benchmark.c +++ b/mm/gup_benchmark.c @@ -26,6 +26,7 @@ static int __gup_benchmark_ioctl(unsigned int cmd, unsigned long i, nr_pages, addr, next; int nr; struct page **pages; + int ret = 0; if (gup->size > ULONG_MAX) return -EINVAL; @@ -63,7 +64,9 @@ static int __gup_benchmark_ioctl(unsigned int cmd, NULL); break; default: - return -1; + kvfree(pages); + ret = -EINVAL; + goto out; } if (nr <= 0) @@ -85,7 +88,8 @@ static int __gup_benchmark_ioctl(unsigned int cmd, gup->put_delta_usec = ktime_us_delta(end_time, start_time); kvfree(pages); - return 0; +out: + return ret; } static long gup_benchmark_ioctl(struct file *filep, unsigned int cmd, diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 41a0fbddc96b..a88093213674 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -527,13 +527,13 @@ void prep_transhuge_page(struct page *page) set_compound_page_dtor(page, TRANSHUGE_PAGE_DTOR); } -static unsigned long __thp_get_unmapped_area(struct file *filp, unsigned long len, +static unsigned long __thp_get_unmapped_area(struct file *filp, + unsigned long addr, unsigned long len, loff_t off, unsigned long flags, unsigned long size) { - unsigned long addr; loff_t off_end = off + len; loff_t off_align = round_up(off, size); - unsigned long len_pad; + unsigned long len_pad, ret; if (off_end <= off_align || (off_end - off_align) < size) return 0; @@ -542,30 +542,40 @@ static unsigned long __thp_get_unmapped_area(struct file *filp, unsigned long le if (len_pad < len || (off + len_pad) < off) return 0; - addr = current->mm->get_unmapped_area(filp, 0, len_pad, + ret = current->mm->get_unmapped_area(filp, addr, len_pad, off >> PAGE_SHIFT, flags); - if (IS_ERR_VALUE(addr)) + + /* + * The failure might be due to length padding. The caller will retry + * without the padding. + */ + if (IS_ERR_VALUE(ret)) return 0; - addr += (off - addr) & (size - 1); - return addr; + /* + * Do not try to align to THP boundary if allocation at the address + * hint succeeds. + */ + if (ret == addr) + return addr; + + ret += (off - ret) & (size - 1); + return ret; } unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { + unsigned long ret; loff_t off = (loff_t)pgoff << PAGE_SHIFT; - if (addr) - goto out; if (!IS_DAX(filp->f_mapping->host) || !IS_ENABLED(CONFIG_FS_DAX_PMD)) goto out; - addr = __thp_get_unmapped_area(filp, len, off, flags, PMD_SIZE); - if (addr) - return addr; - - out: + ret = __thp_get_unmapped_area(filp, addr, len, off, flags, PMD_SIZE); + if (ret) + return ret; +out: return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags); } EXPORT_SYMBOL_GPL(thp_get_unmapped_area); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ac65bb5e38ac..dd8737a94bec 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -27,6 +27,7 @@ #include <linux/swapops.h> #include <linux/jhash.h> #include <linux/numa.h> +#include <linux/llist.h> #include <asm/page.h> #include <asm/pgtable.h> @@ -1136,7 +1137,7 @@ static inline void ClearPageHugeTemporary(struct page *page) page[2].mapping = NULL; } -void free_huge_page(struct page *page) +static void __free_huge_page(struct page *page) { /* * Can't pass hstate in here because it is called from the @@ -1199,6 +1200,54 @@ void free_huge_page(struct page *page) spin_unlock(&hugetlb_lock); } +/* + * As free_huge_page() can be called from a non-task context, we have + * to defer the actual freeing in a workqueue to prevent potential + * hugetlb_lock deadlock. + * + * free_hpage_workfn() locklessly retrieves the linked list of pages to + * be freed and frees them one-by-one. As the page->mapping pointer is + * going to be cleared in __free_huge_page() anyway, it is reused as the + * llist_node structure of a lockless linked list of huge pages to be freed. + */ +static LLIST_HEAD(hpage_freelist); + +static void free_hpage_workfn(struct work_struct *work) +{ + struct llist_node *node; + struct page *page; + + node = llist_del_all(&hpage_freelist); + + while (node) { + page = container_of((struct address_space **)node, + struct page, mapping); + node = node->next; + __free_huge_page(page); + } +} +static DECLARE_WORK(free_hpage_work, free_hpage_workfn); + +void free_huge_page(struct page *page) +{ + /* + * Defer freeing if in non-task context to avoid hugetlb_lock deadlock. + */ + if (!in_task()) { + /* + * Only call schedule_work() if hpage_freelist is previously + * empty. Otherwise, schedule_work() had been called but the + * workfn hasn't retrieved the list yet. + */ + if (llist_add((struct llist_node *)&page->mapping, + &hpage_freelist)) + schedule_work(&free_hpage_work); + return; + } + + __free_huge_page(page); +} + static void prep_new_huge_page(struct hstate *h, struct page *page, int nid) { INIT_LIST_HEAD(&page->lru); diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 2fa710bb6358..c15d8ae68c96 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -778,15 +778,17 @@ static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr, return 0; } -int kasan_populate_vmalloc(unsigned long requested_size, struct vm_struct *area) +int kasan_populate_vmalloc(unsigned long addr, unsigned long size) { unsigned long shadow_start, shadow_end; int ret; - shadow_start = (unsigned long)kasan_mem_to_shadow(area->addr); + if (!is_vmalloc_or_module_addr((void *)addr)) + return 0; + + shadow_start = (unsigned long)kasan_mem_to_shadow((void *)addr); shadow_start = ALIGN_DOWN(shadow_start, PAGE_SIZE); - shadow_end = (unsigned long)kasan_mem_to_shadow(area->addr + - area->size); + shadow_end = (unsigned long)kasan_mem_to_shadow((void *)addr + size); shadow_end = ALIGN(shadow_end, PAGE_SIZE); ret = apply_to_page_range(&init_mm, shadow_start, @@ -797,10 +799,6 @@ int kasan_populate_vmalloc(unsigned long requested_size, struct vm_struct *area) flush_cache_vmap(shadow_start, shadow_end); - kasan_unpoison_shadow(area->addr, requested_size); - - area->flags |= VM_KASAN; - /* * We need to be careful about inter-cpu effects here. Consider: * @@ -843,12 +841,23 @@ int kasan_populate_vmalloc(unsigned long requested_size, struct vm_struct *area) * Poison the shadow for a vmalloc region. Called as part of the * freeing process at the time the region is freed. */ -void kasan_poison_vmalloc(void *start, unsigned long size) +void kasan_poison_vmalloc(const void *start, unsigned long size) { + if (!is_vmalloc_or_module_addr(start)) + return; + size = round_up(size, KASAN_SHADOW_SCALE_SIZE); kasan_poison_shadow(start, size, KASAN_VMALLOC_INVALID); } +void kasan_unpoison_vmalloc(const void *start, unsigned long size) +{ + if (!is_vmalloc_or_module_addr(start)) + return; + + kasan_unpoison_shadow(start, size); +} + static int kasan_depopulate_vmalloc_pte(pte_t *ptep, unsigned long addr, void *unused) { @@ -948,6 +957,7 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end, { void *shadow_start, *shadow_end; unsigned long region_start, region_end; + unsigned long size; region_start = ALIGN(start, PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE); region_end = ALIGN_DOWN(end, PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE); @@ -970,9 +980,11 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end, shadow_end = kasan_mem_to_shadow((void *)region_end); if (shadow_end > shadow_start) { - apply_to_page_range(&init_mm, (unsigned long)shadow_start, - (unsigned long)(shadow_end - shadow_start), - kasan_depopulate_vmalloc_pte, NULL); + size = shadow_end - shadow_start; + apply_to_existing_page_range(&init_mm, + (unsigned long)shadow_start, + size, kasan_depopulate_vmalloc_pte, + NULL); flush_tlb_kernel_range((unsigned long)shadow_start, (unsigned long)shadow_end); } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c5b5f74cfd4d..6c83cf4ed970 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3287,49 +3287,34 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css, } } -static void memcg_flush_percpu_vmstats(struct mem_cgroup *memcg, bool slab_only) +static void memcg_flush_percpu_vmstats(struct mem_cgroup *memcg) { - unsigned long stat[MEMCG_NR_STAT]; + unsigned long stat[MEMCG_NR_STAT] = {0}; struct mem_cgroup *mi; int node, cpu, i; - int min_idx, max_idx; - - if (slab_only) { - min_idx = NR_SLAB_RECLAIMABLE; - max_idx = NR_SLAB_UNRECLAIMABLE; - } else { - min_idx = 0; - max_idx = MEMCG_NR_STAT; - } - - for (i = min_idx; i < max_idx; i++) - stat[i] = 0; for_each_online_cpu(cpu) - for (i = min_idx; i < max_idx; i++) + for (i = 0; i < MEMCG_NR_STAT; i++) stat[i] += per_cpu(memcg->vmstats_percpu->stat[i], cpu); for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) - for (i = min_idx; i < max_idx; i++) + for (i = 0; i < MEMCG_NR_STAT; i++) atomic_long_add(stat[i], &mi->vmstats[i]); - if (!slab_only) - max_idx = NR_VM_NODE_STAT_ITEMS; - for_each_node(node) { struct mem_cgroup_per_node *pn = memcg->nodeinfo[node]; struct mem_cgroup_per_node *pi; - for (i = min_idx; i < max_idx; i++) + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) stat[i] = 0; for_each_online_cpu(cpu) - for (i = min_idx; i < max_idx; i++) + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) stat[i] += per_cpu( pn->lruvec_stat_cpu->count[i], cpu); for (pi = pn; pi; pi = parent_nodeinfo(pi, node)) - for (i = min_idx; i < max_idx; i++) + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) atomic_long_add(stat[i], &pi->lruvec_stat[i]); } } @@ -3403,13 +3388,9 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg) parent = root_mem_cgroup; /* - * Deactivate and reparent kmem_caches. Then flush percpu - * slab statistics to have precise values at the parent and - * all ancestor levels. It's required to keep slab stats - * accurate after the reparenting of kmem_caches. + * Deactivate and reparent kmem_caches. */ memcg_deactivate_kmem_caches(memcg, parent); - memcg_flush_percpu_vmstats(memcg, true); kmemcg_id = memcg->kmemcg_id; BUG_ON(kmemcg_id < 0); @@ -4913,7 +4894,7 @@ static void mem_cgroup_free(struct mem_cgroup *memcg) * Flush percpu vmstats and vmevents to guarantee the value correctness * on parent's and all ancestor levels. */ - memcg_flush_percpu_vmstats(memcg, false); + memcg_flush_percpu_vmstats(memcg); memcg_flush_percpu_vmevents(memcg); __mem_cgroup_free(memcg); } diff --git a/mm/memory.c b/mm/memory.c index 606da187d1de..45442d9a4f52 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2021,26 +2021,34 @@ EXPORT_SYMBOL(vm_iomap_memory); static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, unsigned long end, - pte_fn_t fn, void *data) + pte_fn_t fn, void *data, bool create) { pte_t *pte; - int err; + int err = 0; spinlock_t *uninitialized_var(ptl); - pte = (mm == &init_mm) ? - pte_alloc_kernel(pmd, addr) : - pte_alloc_map_lock(mm, pmd, addr, &ptl); - if (!pte) - return -ENOMEM; + if (create) { + pte = (mm == &init_mm) ? + pte_alloc_kernel(pmd, addr) : + pte_alloc_map_lock(mm, pmd, addr, &ptl); + if (!pte) + return -ENOMEM; + } else { + pte = (mm == &init_mm) ? + pte_offset_kernel(pmd, addr) : + pte_offset_map_lock(mm, pmd, addr, &ptl); + } BUG_ON(pmd_huge(*pmd)); arch_enter_lazy_mmu_mode(); do { - err = fn(pte++, addr, data); - if (err) - break; + if (create || !pte_none(*pte)) { + err = fn(pte++, addr, data); + if (err) + break; + } } while (addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); @@ -2052,77 +2060,95 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, unsigned long addr, unsigned long end, - pte_fn_t fn, void *data) + pte_fn_t fn, void *data, bool create) { pmd_t *pmd; unsigned long next; - int err; + int err = 0; BUG_ON(pud_huge(*pud)); - pmd = pmd_alloc(mm, pud, addr); - if (!pmd) - return -ENOMEM; + if (create) { + pmd = pmd_alloc(mm, pud, addr); + if (!pmd) + return -ENOMEM; + } else { + pmd = pmd_offset(pud, addr); + } do { next = pmd_addr_end(addr, end); - err = apply_to_pte_range(mm, pmd, addr, next, fn, data); - if (err) - break; + if (create || !pmd_none_or_clear_bad(pmd)) { + err = apply_to_pte_range(mm, pmd, addr, next, fn, data, + create); + if (err) + break; + } } while (pmd++, addr = next, addr != end); return err; } static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d, unsigned long addr, unsigned long end, - pte_fn_t fn, void *data) + pte_fn_t fn, void *data, bool create) { pud_t *pud; unsigned long next; - int err; + int err = 0; - pud = pud_alloc(mm, p4d, addr); - if (!pud) - return -ENOMEM; + if (create) { + pud = pud_alloc(mm, p4d, addr); + if (!pud) + return -ENOMEM; + } else { + pud = pud_offset(p4d, addr); + } do { next = pud_addr_end(addr, end); - err = apply_to_pmd_range(mm, pud, addr, next, fn, data); - if (err) - break; + if (create || !pud_none_or_clear_bad(pud)) { + err = apply_to_pmd_range(mm, pud, addr, next, fn, data, + create); + if (err) + break; + } } while (pud++, addr = next, addr != end); return err; } static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd, unsigned long addr, unsigned long end, - pte_fn_t fn, void *data) + pte_fn_t fn, void *data, bool create) { p4d_t *p4d; unsigned long next; - int err; + int err = 0; - p4d = p4d_alloc(mm, pgd, addr); - if (!p4d) - return -ENOMEM; + if (create) { + p4d = p4d_alloc(mm, pgd, addr); + if (!p4d) + return -ENOMEM; + } else { + p4d = p4d_offset(pgd, addr); + } do { next = p4d_addr_end(addr, end); - err = apply_to_pud_range(mm, p4d, addr, next, fn, data); - if (err) - break; + if (create || !p4d_none_or_clear_bad(p4d)) { + err = apply_to_pud_range(mm, p4d, addr, next, fn, data, + create); + if (err) + break; + } } while (p4d++, addr = next, addr != end); return err; } -/* - * Scan a region of virtual memory, filling in page tables as necessary - * and calling a provided function on each leaf page table. - */ -int apply_to_page_range(struct mm_struct *mm, unsigned long addr, - unsigned long size, pte_fn_t fn, void *data) +static int __apply_to_page_range(struct mm_struct *mm, unsigned long addr, + unsigned long size, pte_fn_t fn, + void *data, bool create) { pgd_t *pgd; unsigned long next; unsigned long end = addr + size; - int err; + int err = 0; if (WARN_ON(addr >= end)) return -EINVAL; @@ -2130,16 +2156,42 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr, pgd = pgd_offset(mm, addr); do { next = pgd_addr_end(addr, end); - err = apply_to_p4d_range(mm, pgd, addr, next, fn, data); + if (!create && pgd_none_or_clear_bad(pgd)) + continue; + err = apply_to_p4d_range(mm, pgd, addr, next, fn, data, create); if (err) break; } while (pgd++, addr = next, addr != end); return err; } + +/* + * Scan a region of virtual memory, filling in page tables as necessary + * and calling a provided function on each leaf page table. + */ +int apply_to_page_range(struct mm_struct *mm, unsigned long addr, + unsigned long size, pte_fn_t fn, void *data) +{ + return __apply_to_page_range(mm, addr, size, fn, data, true); +} EXPORT_SYMBOL_GPL(apply_to_page_range); /* + * Scan a region of virtual memory, calling a provided function on + * each leaf page table where it exists. + * + * Unlike apply_to_page_range, this does _not_ fill in page tables + * where they are absent. + */ +int apply_to_existing_page_range(struct mm_struct *mm, unsigned long addr, + unsigned long size, pte_fn_t fn, void *data) +{ + return __apply_to_page_range(mm, addr, size, fn, data, false); +} +EXPORT_SYMBOL_GPL(apply_to_existing_page_range); + +/* * handle_pte_fault chooses page fault handler according to an entry which was * read non-atomically. Before making any commitment, on those architectures * or configurations (e.g. i386 with PAE) which might give a mix of unmatched diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 55ac23ef11c1..a91a072f2b2c 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -483,8 +483,9 @@ static void update_pgdat_span(struct pglist_data *pgdat) pgdat->node_spanned_pages = node_end_pfn - node_start_pfn; } -static void __remove_zone(struct zone *zone, unsigned long start_pfn, - unsigned long nr_pages) +void __ref remove_pfn_range_from_zone(struct zone *zone, + unsigned long start_pfn, + unsigned long nr_pages) { struct pglist_data *pgdat = zone->zone_pgdat; unsigned long flags; @@ -499,28 +500,30 @@ static void __remove_zone(struct zone *zone, unsigned long start_pfn, return; #endif + clear_zone_contiguous(zone); + pgdat_resize_lock(zone->zone_pgdat, &flags); shrink_zone_span(zone, start_pfn, start_pfn + nr_pages); update_pgdat_span(pgdat); pgdat_resize_unlock(zone->zone_pgdat, &flags); + + set_zone_contiguous(zone); } -static void __remove_section(struct zone *zone, unsigned long pfn, - unsigned long nr_pages, unsigned long map_offset, - struct vmem_altmap *altmap) +static void __remove_section(unsigned long pfn, unsigned long nr_pages, + unsigned long map_offset, + struct vmem_altmap *altmap) { struct mem_section *ms = __nr_to_section(pfn_to_section_nr(pfn)); if (WARN_ON_ONCE(!valid_section(ms))) return; - __remove_zone(zone, pfn, nr_pages); sparse_remove_section(ms, pfn, nr_pages, map_offset, altmap); } /** - * __remove_pages() - remove sections of pages from a zone - * @zone: zone from which pages need to be removed + * __remove_pages() - remove sections of pages * @pfn: starting pageframe (must be aligned to start of a section) * @nr_pages: number of pages to remove (must be multiple of section size) * @altmap: alternative device page map or %NULL if default memmap is used @@ -530,16 +533,14 @@ static void __remove_section(struct zone *zone, unsigned long pfn, * sure that pages are marked reserved and zones are adjust properly by * calling offline_pages(). */ -void __remove_pages(struct zone *zone, unsigned long pfn, - unsigned long nr_pages, struct vmem_altmap *altmap) +void __remove_pages(unsigned long pfn, unsigned long nr_pages, + struct vmem_altmap *altmap) { unsigned long map_offset = 0; unsigned long nr, start_sec, end_sec; map_offset = vmem_altmap_offset(altmap); - clear_zone_contiguous(zone); - if (check_pfn_span(pfn, nr_pages, "remove")) return; @@ -551,13 +552,11 @@ void __remove_pages(struct zone *zone, unsigned long pfn, cond_resched(); pfns = min(nr_pages, PAGES_PER_SECTION - (pfn & ~PAGE_SECTION_MASK)); - __remove_section(zone, pfn, pfns, map_offset, altmap); + __remove_section(pfn, pfns, map_offset, altmap); pfn += pfns; nr_pages -= pfns; map_offset = 0; } - - set_zone_contiguous(zone); } int set_online_page_callback(online_page_callback_t callback) @@ -869,6 +868,7 @@ failed_addition: (unsigned long long) pfn << PAGE_SHIFT, (((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1); memory_notify(MEM_CANCEL_ONLINE, &arg); + remove_pfn_range_from_zone(zone, pfn, nr_pages); mem_hotplug_done(); return ret; } @@ -1628,6 +1628,7 @@ static int __ref __offline_pages(unsigned long start_pfn, writeback_set_ratelimit(); memory_notify(MEM_OFFLINE, &arg); + remove_pfn_range_from_zone(zone, start_pfn, nr_pages); mem_hotplug_done(); return 0; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 067cf7d3daf5..b2920ae87a61 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2148,18 +2148,22 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, nmask = policy_nodemask(gfp, pol); if (!nmask || node_isset(hpage_node, *nmask)) { mpol_cond_put(pol); + /* + * First, try to allocate THP only on local node, but + * don't reclaim unnecessarily, just compact. + */ page = __alloc_pages_node(hpage_node, - gfp | __GFP_THISNODE, order); + gfp | __GFP_THISNODE | __GFP_NORETRY, order); /* * If hugepage allocations are configured to always * synchronous compact or the vma has been madvised * to prefer hugepage backing, retry allowing remote - * memory as well. + * memory with both reclaim and compact as well. */ if (!page && (gfp & __GFP_DIRECT_RECLAIM)) page = __alloc_pages_node(hpage_node, - gfp | __GFP_NORETRY, order); + gfp, order); goto out; } diff --git a/mm/memremap.c b/mm/memremap.c index 03ccbdfeb697..c51c6bd2fe34 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -120,7 +120,7 @@ void memunmap_pages(struct dev_pagemap *pgmap) mem_hotplug_begin(); if (pgmap->type == MEMORY_DEVICE_PRIVATE) { - __remove_pages(page_zone(first_page), PHYS_PFN(res->start), + __remove_pages(PHYS_PFN(res->start), PHYS_PFN(resource_size(res)), NULL); } else { arch_remove_memory(nid, res->start, resource_size(res), diff --git a/mm/migrate.c b/mm/migrate.c index eae1565285e3..86873b6f38a7 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1512,9 +1512,11 @@ static int do_move_pages_to_node(struct mm_struct *mm, /* * Resolves the given address to a struct page, isolates it from the LRU and * puts it to the given pagelist. - * Returns -errno if the page cannot be found/isolated or 0 when it has been - * queued or the page doesn't need to be migrated because it is already on - * the target node + * Returns: + * errno - if the page cannot be found/isolated + * 0 - when it doesn't have to be migrated because it is already on the + * target node + * 1 - when it has been queued */ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr, int node, struct list_head *pagelist, bool migrate_all) @@ -1553,7 +1555,7 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr, if (PageHuge(page)) { if (PageHead(page)) { isolate_huge_page(page, pagelist); - err = 0; + err = 1; } } else { struct page *head; @@ -1563,7 +1565,7 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr, if (err) goto out_putpage; - err = 0; + err = 1; list_add_tail(&head->lru, pagelist); mod_node_page_state(page_pgdat(head), NR_ISOLATED_ANON + page_is_file_cache(head), @@ -1640,8 +1642,17 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes, */ err = add_page_for_migration(mm, addr, current_node, &pagelist, flags & MPOL_MF_MOVE_ALL); - if (!err) + + if (!err) { + /* The page is already on the target node */ + err = store_status(status, i, current_node, 1); + if (err) + goto out_flush; continue; + } else if (err > 0) { + /* The page is successfully queued for migration */ + continue; + } err = store_status(status, i, err, 1); if (err) diff --git a/mm/mmap.c b/mm/mmap.c index 9c648524e4dc..71e4ffc83bcd 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -90,12 +90,6 @@ static void unmap_region(struct mm_struct *mm, * MAP_PRIVATE r: (no) no r: (yes) yes r: (no) yes r: (no) yes * w: (no) no w: (no) no w: (copy) copy w: (no) no * x: (no) no x: (no) yes x: (no) yes x: (yes) yes - * - * On arm64, PROT_EXEC has the following behaviour for both MAP_SHARED and - * MAP_PRIVATE: - * r: (no) no - * w: (no) no - * x: (yes) yes */ pgprot_t protection_map[16] __ro_after_init = { __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111, diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 71e3acea7817..d58c481b3df8 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -890,7 +890,7 @@ static void __oom_kill_process(struct task_struct *victim, const char *message) K(get_mm_counter(mm, MM_FILEPAGES)), K(get_mm_counter(mm, MM_SHMEMPAGES)), from_kuid(&init_user_ns, task_uid(victim)), - mm_pgtables_bytes(mm), victim->signal->oom_score_adj); + mm_pgtables_bytes(mm) >> 10, victim->signal->oom_score_adj); task_unlock(victim); /* diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 50055d2e4ea8..2caf780a42e7 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -201,11 +201,11 @@ static void wb_min_max_ratio(struct bdi_writeback *wb, if (this_bw < tot_bw) { if (min) { min *= this_bw; - do_div(min, tot_bw); + min = div64_ul(min, tot_bw); } if (max < 100) { max *= this_bw; - do_div(max, tot_bw); + max = div64_ul(max, tot_bw); } } @@ -766,7 +766,7 @@ static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc) struct wb_domain *dom = dtc_dom(dtc); unsigned long thresh = dtc->thresh; u64 wb_thresh; - long numerator, denominator; + unsigned long numerator, denominator; unsigned long wb_min_ratio, wb_max_ratio; /* @@ -777,7 +777,7 @@ static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc) wb_thresh = (thresh * (100 - bdi_min_ratio)) / 100; wb_thresh *= numerator; - do_div(wb_thresh, denominator); + wb_thresh = div64_ul(wb_thresh, denominator); wb_min_max_ratio(dtc->wb, &wb_min_ratio, &wb_max_ratio); @@ -1102,7 +1102,7 @@ static void wb_update_write_bandwidth(struct bdi_writeback *wb, bw = written - min(written, wb->written_stamp); bw *= HZ; if (unlikely(elapsed > period)) { - do_div(bw, elapsed); + bw = div64_ul(bw, elapsed); avg = bw; goto out; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4785a8a2040e..d047bf7d8fd4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -694,34 +694,27 @@ void prep_compound_page(struct page *page, unsigned int order) #ifdef CONFIG_DEBUG_PAGEALLOC unsigned int _debug_guardpage_minorder; -#ifdef CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT -DEFINE_STATIC_KEY_TRUE(_debug_pagealloc_enabled); -#else +bool _debug_pagealloc_enabled_early __read_mostly + = IS_ENABLED(CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT); +EXPORT_SYMBOL(_debug_pagealloc_enabled_early); DEFINE_STATIC_KEY_FALSE(_debug_pagealloc_enabled); -#endif EXPORT_SYMBOL(_debug_pagealloc_enabled); DEFINE_STATIC_KEY_FALSE(_debug_guardpage_enabled); static int __init early_debug_pagealloc(char *buf) { - bool enable = false; - - if (kstrtobool(buf, &enable)) - return -EINVAL; - - if (enable) - static_branch_enable(&_debug_pagealloc_enabled); - - return 0; + return kstrtobool(buf, &_debug_pagealloc_enabled_early); } early_param("debug_pagealloc", early_debug_pagealloc); -static void init_debug_guardpage(void) +void init_debug_pagealloc(void) { if (!debug_pagealloc_enabled()) return; + static_branch_enable(&_debug_pagealloc_enabled); + if (!debug_guardpage_minorder()) return; @@ -1186,7 +1179,7 @@ static __always_inline bool free_pages_prepare(struct page *page, */ arch_free_page(page, order); - if (debug_pagealloc_enabled()) + if (debug_pagealloc_enabled_static()) kernel_map_pages(page, 1 << order, 0); kasan_free_nondeferred_pages(page, order); @@ -1207,7 +1200,7 @@ static bool free_pcp_prepare(struct page *page) static bool bulkfree_pcp_prepare(struct page *page) { - if (debug_pagealloc_enabled()) + if (debug_pagealloc_enabled_static()) return free_pages_check(page); else return false; @@ -1221,7 +1214,7 @@ static bool bulkfree_pcp_prepare(struct page *page) */ static bool free_pcp_prepare(struct page *page) { - if (debug_pagealloc_enabled()) + if (debug_pagealloc_enabled_static()) return free_pages_prepare(page, 0, true); else return free_pages_prepare(page, 0, false); @@ -1973,10 +1966,6 @@ void __init page_alloc_init_late(void) for_each_populated_zone(zone) set_zone_contiguous(zone); - -#ifdef CONFIG_DEBUG_PAGEALLOC - init_debug_guardpage(); -#endif } #ifdef CONFIG_CMA @@ -2106,7 +2095,7 @@ static inline bool free_pages_prezeroed(void) */ static inline bool check_pcp_refill(struct page *page) { - if (debug_pagealloc_enabled()) + if (debug_pagealloc_enabled_static()) return check_new_page(page); else return false; @@ -2128,7 +2117,7 @@ static inline bool check_pcp_refill(struct page *page) } static inline bool check_new_pcp(struct page *page) { - if (debug_pagealloc_enabled()) + if (debug_pagealloc_enabled_static()) return check_new_page(page); else return false; @@ -2155,7 +2144,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order, set_page_refcounted(page); arch_alloc_page(page, order); - if (debug_pagealloc_enabled()) + if (debug_pagealloc_enabled_static()) kernel_map_pages(page, 1 << order, 1); kasan_alloc_pages(page, order); kernel_poison_pages(page, 1 << order, 1); @@ -4476,8 +4465,11 @@ retry_cpuset: if (page) goto got_pg; - if (order >= pageblock_order && (gfp_mask & __GFP_IO) && - !(gfp_mask & __GFP_RETRY_MAYFAIL)) { + /* + * Checks for costly allocations with __GFP_NORETRY, which + * includes some THP page fault allocations + */ + if (costly_order && (gfp_mask & __GFP_NORETRY)) { /* * If allocating entire pageblock(s) and compaction * failed because all zones are below low watermarks @@ -4498,23 +4490,6 @@ retry_cpuset: if (compact_result == COMPACT_SKIPPED || compact_result == COMPACT_DEFERRED) goto nopage; - } - - /* - * Checks for costly allocations with __GFP_NORETRY, which - * includes THP page fault allocations - */ - if (costly_order && (gfp_mask & __GFP_NORETRY)) { - /* - * If compaction is deferred for high-order allocations, - * it is because sync compaction recently failed. If - * this is the case and the caller requested a THP - * allocation, we do not want to heavily disrupt the - * system, so we fail the allocation instead of entering - * direct reclaim. - */ - if (compact_result == COMPACT_DEFERRED) - goto nopage; /* * Looks like reclaim/compaction is worth trying, but diff --git a/mm/shmem.c b/mm/shmem.c index 165fa6332993..8793e8cc1a48 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2107,9 +2107,10 @@ unsigned long shmem_get_unmapped_area(struct file *file, /* * Our priority is to support MAP_SHARED mapped hugely; * and support MAP_PRIVATE mapped hugely too, until it is COWed. - * But if caller specified an address hint, respect that as before. + * But if caller specified an address hint and we allocated area there + * successfully, respect that as before. */ - if (uaddr) + if (uaddr == addr) return addr; if (shmem_huge != SHMEM_HUGE_FORCE) { @@ -2143,7 +2144,7 @@ unsigned long shmem_get_unmapped_area(struct file *file, if (inflated_len < len) return addr; - inflated_addr = get_area(NULL, 0, inflated_len, 0, flags); + inflated_addr = get_area(NULL, uaddr, inflated_len, 0, flags); if (IS_ERR_VALUE(inflated_addr)) return addr; if (inflated_addr & ~PAGE_MASK) diff --git a/mm/slab.c b/mm/slab.c index f1e1840af533..a89633603b2d 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1416,7 +1416,7 @@ static void kmem_rcu_free(struct rcu_head *head) #if DEBUG static bool is_debug_pagealloc_cache(struct kmem_cache *cachep) { - if (debug_pagealloc_enabled() && OFF_SLAB(cachep) && + if (debug_pagealloc_enabled_static() && OFF_SLAB(cachep) && (cachep->size % PAGE_SIZE) == 0) return true; @@ -2008,7 +2008,7 @@ int __kmem_cache_create(struct kmem_cache *cachep, slab_flags_t flags) * to check size >= 256. It guarantees that all necessary small * sized slab is initialized in current slab initialization sequence. */ - if (debug_pagealloc_enabled() && (flags & SLAB_POISON) && + if (debug_pagealloc_enabled_static() && (flags & SLAB_POISON) && size >= 256 && cachep->object_size > cache_line_size()) { if (size < PAGE_SIZE || size % PAGE_SIZE == 0) { size_t tmp_size = ALIGN(size, PAGE_SIZE); diff --git a/mm/slab_common.c b/mm/slab_common.c index f0ab6d4ceb4c..0d95ddea13b0 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -903,7 +903,8 @@ static void flush_memcg_workqueue(struct kmem_cache *s) * deactivates the memcg kmem_caches through workqueue. Make sure all * previous workitems on workqueue are processed. */ - flush_workqueue(memcg_kmem_cache_wq); + if (likely(memcg_kmem_cache_wq)) + flush_workqueue(memcg_kmem_cache_wq); /* * If we're racing with children kmem_cache deactivation, it might diff --git a/mm/slub.c b/mm/slub.c index d11389710b12..8eafccf75940 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -288,7 +288,7 @@ static inline void *get_freepointer_safe(struct kmem_cache *s, void *object) unsigned long freepointer_addr; void *p; - if (!debug_pagealloc_enabled()) + if (!debug_pagealloc_enabled_static()) return get_freepointer(s, object); freepointer_addr = (unsigned long)object + s->offset; diff --git a/mm/sparse.c b/mm/sparse.c index b20ab7cdac86..3822ecbd8a1f 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -777,7 +777,14 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages, if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION)) { unsigned long section_nr = pfn_to_section_nr(pfn); - if (!section_is_early) { + /* + * When removing an early section, the usage map is kept (as the + * usage maps of other sections fall into the same page). It + * will be re-used when re-adding the section - which is then no + * longer an early section. If the usage map is PageReserved, it + * was allocated during boot. + */ + if (!PageReserved(virt_to_page(ms->usage))) { kfree(ms->usage); ms->usage = NULL; } diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 4d3b3d60d893..b29ad17edcf5 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1062,6 +1062,26 @@ __alloc_vmap_area(unsigned long size, unsigned long align, } /* + * Free a region of KVA allocated by alloc_vmap_area + */ +static void free_vmap_area(struct vmap_area *va) +{ + /* + * Remove from the busy tree/list. + */ + spin_lock(&vmap_area_lock); + unlink_va(va, &vmap_area_root); + spin_unlock(&vmap_area_lock); + + /* + * Insert/Merge it back to the free tree/list. + */ + spin_lock(&free_vmap_area_lock); + merge_or_add_vmap_area(va, &free_vmap_area_root, &free_vmap_area_list); + spin_unlock(&free_vmap_area_lock); +} + +/* * Allocate a region of KVA of the specified size and alignment, within the * vstart and vend. */ @@ -1073,6 +1093,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size, struct vmap_area *va, *pva; unsigned long addr; int purged = 0; + int ret; BUG_ON(!size); BUG_ON(offset_in_page(size)); @@ -1139,6 +1160,7 @@ retry: va->va_end = addr + size; va->vm = NULL; + spin_lock(&vmap_area_lock); insert_vmap_area(va, &vmap_area_root, &vmap_area_list); spin_unlock(&vmap_area_lock); @@ -1147,6 +1169,12 @@ retry: BUG_ON(va->va_start < vstart); BUG_ON(va->va_end > vend); + ret = kasan_populate_vmalloc(addr, size); + if (ret) { + free_vmap_area(va); + return ERR_PTR(ret); + } + return va; overflow: @@ -1186,26 +1214,6 @@ int unregister_vmap_purge_notifier(struct notifier_block *nb) EXPORT_SYMBOL_GPL(unregister_vmap_purge_notifier); /* - * Free a region of KVA allocated by alloc_vmap_area - */ -static void free_vmap_area(struct vmap_area *va) -{ - /* - * Remove from the busy tree/list. - */ - spin_lock(&vmap_area_lock); - unlink_va(va, &vmap_area_root); - spin_unlock(&vmap_area_lock); - - /* - * Insert/Merge it back to the free tree/list. - */ - spin_lock(&free_vmap_area_lock); - merge_or_add_vmap_area(va, &free_vmap_area_root, &free_vmap_area_list); - spin_unlock(&free_vmap_area_lock); -} - -/* * Clear the pagetable entries of a given vmap_area */ static void unmap_vmap_area(struct vmap_area *va) @@ -1375,7 +1383,7 @@ static void free_unmap_vmap_area(struct vmap_area *va) { flush_cache_vunmap(va->va_start, va->va_end); unmap_vmap_area(va); - if (debug_pagealloc_enabled()) + if (debug_pagealloc_enabled_static()) flush_tlb_kernel_range(va->va_start, va->va_end); free_vmap_area_noflush(va); @@ -1673,7 +1681,7 @@ static void vb_free(const void *addr, unsigned long size) vunmap_page_range((unsigned long)addr, (unsigned long)addr + size); - if (debug_pagealloc_enabled()) + if (debug_pagealloc_enabled_static()) flush_tlb_kernel_range((unsigned long)addr, (unsigned long)addr + size); @@ -1771,6 +1779,8 @@ void vm_unmap_ram(const void *mem, unsigned int count) BUG_ON(addr > VMALLOC_END); BUG_ON(!PAGE_ALIGNED(addr)); + kasan_poison_vmalloc(mem, size); + if (likely(count <= VMAP_MAX_ALLOC)) { debug_check_no_locks_freed(mem, size); vb_free(mem, size); @@ -1821,6 +1831,9 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t pro addr = va->va_start; mem = (void *)addr; } + + kasan_unpoison_vmalloc(mem, size); + if (vmap_page_range(addr, addr + size, prot, pages) < 0) { vm_unmap_ram(mem, count); return NULL; @@ -2075,6 +2088,7 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, { struct vmap_area *va; struct vm_struct *area; + unsigned long requested_size = size; BUG_ON(in_interrupt()); size = PAGE_ALIGN(size); @@ -2098,23 +2112,9 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, return NULL; } - setup_vmalloc_vm(area, va, flags, caller); + kasan_unpoison_vmalloc((void *)va->va_start, requested_size); - /* - * For KASAN, if we are in vmalloc space, we need to cover the shadow - * area with real memory. If we come here through VM_ALLOC, this is - * done by a higher level function that has access to the true size, - * which might not be a full page. - * - * We assume module space comes via VM_ALLOC path. - */ - if (is_vmalloc_addr(area->addr) && !(area->flags & VM_ALLOC)) { - if (kasan_populate_vmalloc(area->size, area)) { - unmap_vmap_area(va); - kfree(area); - return NULL; - } - } + setup_vmalloc_vm(area, va, flags, caller); return area; } @@ -2293,8 +2293,7 @@ static void __vunmap(const void *addr, int deallocate_pages) debug_check_no_locks_freed(area->addr, get_vm_area_size(area)); debug_check_no_obj_freed(area->addr, get_vm_area_size(area)); - if (area->flags & VM_KASAN) - kasan_poison_vmalloc(area->addr, area->size); + kasan_poison_vmalloc(area->addr, area->size); vm_remove_mappings(area, deallocate_pages); @@ -2539,7 +2538,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, if (!size || (size >> PAGE_SHIFT) > totalram_pages()) goto fail; - area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED | + area = __get_vm_area_node(real_size, align, VM_ALLOC | VM_UNINITIALIZED | vm_flags, start, end, node, gfp_mask, caller); if (!area) goto fail; @@ -2548,11 +2547,6 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, if (!addr) return NULL; - if (is_vmalloc_or_module_addr(area->addr)) { - if (kasan_populate_vmalloc(real_size, area)) - return NULL; - } - /* * In this function, newly allocated vm_struct has VM_UNINITIALIZED * flag. It means that vm_struct is not fully initialized. @@ -3294,7 +3288,7 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, struct vmap_area **vas, *va; struct vm_struct **vms; int area, area2, last_area, term_area; - unsigned long base, start, size, end, last_end; + unsigned long base, start, size, end, last_end, orig_start, orig_end; bool purged = false; enum fit_type type; @@ -3424,6 +3418,15 @@ retry: spin_unlock(&free_vmap_area_lock); + /* populate the kasan shadow space */ + for (area = 0; area < nr_vms; area++) { + if (kasan_populate_vmalloc(vas[area]->va_start, sizes[area])) + goto err_free_shadow; + + kasan_unpoison_vmalloc((void *)vas[area]->va_start, + sizes[area]); + } + /* insert all vm's */ spin_lock(&vmap_area_lock); for (area = 0; area < nr_vms; area++) { @@ -3434,12 +3437,6 @@ retry: } spin_unlock(&vmap_area_lock); - /* populate the shadow space outside of the lock */ - for (area = 0; area < nr_vms; area++) { - /* assume success here */ - kasan_populate_vmalloc(sizes[area], vms[area]); - } - kfree(vas); return vms; @@ -3451,8 +3448,12 @@ recovery: * and when pcpu_get_vm_areas() is success. */ while (area--) { - merge_or_add_vmap_area(vas[area], &free_vmap_area_root, - &free_vmap_area_list); + orig_start = vas[area]->va_start; + orig_end = vas[area]->va_end; + va = merge_or_add_vmap_area(vas[area], &free_vmap_area_root, + &free_vmap_area_list); + kasan_release_vmalloc(orig_start, orig_end, + va->va_start, va->va_end); vas[area] = NULL; } @@ -3487,6 +3488,28 @@ err_free2: kfree(vas); kfree(vms); return NULL; + +err_free_shadow: + spin_lock(&free_vmap_area_lock); + /* + * We release all the vmalloc shadows, even the ones for regions that + * hadn't been successfully added. This relies on kasan_release_vmalloc + * being able to tolerate this case. + */ + for (area = 0; area < nr_vms; area++) { + orig_start = vas[area]->va_start; + orig_end = vas[area]->va_end; + va = merge_or_add_vmap_area(vas[area], &free_vmap_area_root, + &free_vmap_area_list); + kasan_release_vmalloc(orig_start, orig_end, + va->va_start, va->va_end); + vas[area] = NULL; + kfree(vms[area]); + } + spin_unlock(&free_vmap_area_lock); + kfree(vas); + kfree(vms); + return NULL; } /** diff --git a/mm/vmscan.c b/mm/vmscan.c index 74e8edce83ca..572fb17c6273 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -387,7 +387,7 @@ void register_shrinker_prepared(struct shrinker *shrinker) { down_write(&shrinker_rwsem); list_add_tail(&shrinker->list, &shrinker_list); -#ifdef CONFIG_MEMCG_KMEM +#ifdef CONFIG_MEMCG if (shrinker->flags & SHRINKER_MEMCG_AWARE) idr_replace(&shrinker_idr, shrinker, shrinker->id); #endif diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 2b2b9aae8a3c..22d17ecfe7df 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -2069,6 +2069,11 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage, zs_pool_dec_isolated(pool); } + if (page_zone(newpage) != page_zone(page)) { + dec_zone_page_state(page, NR_ZSPAGES); + inc_zone_page_state(newpage, NR_ZSPAGES); + } + reset_page(page); put_page(page); page = newpage; |