aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-05-19 12:15:32 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-05-19 12:15:32 -0700
commitcb6f8739fbf98203d0fb0bc2c2dbbec0ddfe978a (patch)
treebdb69f5b9fb5a3e7b6810319468706bad2b6e1ac
parentff8583d6e4e33fe3856a609095c683d5dbe39120 (diff)
parentde6da1e8bcf0dd2058b950b127491821207679dc (diff)
downloadlinux-cb6f8739fbf98203d0fb0bc2c2dbbec0ddfe978a.tar.gz
Merge branch 'akpm' (patches from Andrew)
Merge yet more updates from Andrew Morton: "A few final bits: - large changes to vmalloc, yielding large performance benefits - tweak the console-flush-on-panic code - a few fixes" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: panic: add an option to replay all the printk message in buffer initramfs: don't free a non-existent initrd fs/writeback.c: use rcu_barrier() to wait for inflight wb switches going into workqueue when umount mm/compaction.c: correct zone boundary handling when isolating pages from a pageblock mm/vmap: add DEBUG_AUGMENT_LOWEST_MATCH_CHECK macro mm/vmap: add DEBUG_AUGMENT_PROPAGATE_CHECK macro mm/vmalloc.c: keep track of free blocks for vmap allocation
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt1
-rw-r--r--arch/powerpc/kernel/traps.c2
-rw-r--r--fs/fs-writeback.c11
-rw-r--r--include/linux/console.h7
-rw-r--r--include/linux/vmalloc.h6
-rw-r--r--init/initramfs.c2
-rw-r--r--kernel/panic.c6
-rw-r--r--kernel/printk/printk.c12
-rw-r--r--mm/compaction.c4
-rw-r--r--mm/vmalloc.c1095
10 files changed, 889 insertions, 257 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 52e6fbb042cc..138f6664b2e2 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3212,6 +3212,7 @@
bit 2: print timer info
bit 3: print locks info if CONFIG_LOCKDEP is on
bit 4: print ftrace buffer
+ bit 5: print all printk messages in buffer
panic_on_warn panic() instead of WARN(). Useful to cause kdump
on a WARN().
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 665f294725cb..83e59fdaa62d 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -179,7 +179,7 @@ extern void panic_flush_kmsg_end(void)
kmsg_dump(KMSG_DUMP_PANIC);
bust_spinlocks(0);
debug_locks_off();
- console_flush_on_panic();
+ console_flush_on_panic(CONSOLE_FLUSH_PENDING);
}
static unsigned long oops_begin(struct pt_regs *regs)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 36855c1f8daf..b16645b417d9 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -523,8 +523,6 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
isw->inode = inode;
- atomic_inc(&isw_nr_in_flight);
-
/*
* In addition to synchronizing among switchers, I_WB_SWITCH tells
* the RCU protected stat update paths to grab the i_page
@@ -532,6 +530,9 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
* Let's continue after I_WB_SWITCH is guaranteed to be visible.
*/
call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
+
+ atomic_inc(&isw_nr_in_flight);
+
goto out_unlock;
out_free:
@@ -901,7 +902,11 @@ restart:
void cgroup_writeback_umount(void)
{
if (atomic_read(&isw_nr_in_flight)) {
- synchronize_rcu();
+ /*
+ * Use rcu_barrier() to wait for all pending callbacks to
+ * ensure that all in-flight wb switches are in the workqueue.
+ */
+ rcu_barrier();
flush_workqueue(isw_wq);
}
}
diff --git a/include/linux/console.h b/include/linux/console.h
index ec9bdb3d7bab..d09951d5a94e 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -166,6 +166,11 @@ struct console {
extern int console_set_on_cmdline;
extern struct console *early_console;
+enum con_flush_mode {
+ CONSOLE_FLUSH_PENDING,
+ CONSOLE_REPLAY_ALL,
+};
+
extern int add_preferred_console(char *name, int idx, char *options);
extern void register_console(struct console *);
extern int unregister_console(struct console *);
@@ -175,7 +180,7 @@ extern int console_trylock(void);
extern void console_unlock(void);
extern void console_conditional_schedule(void);
extern void console_unblank(void);
-extern void console_flush_on_panic(void);
+extern void console_flush_on_panic(enum con_flush_mode mode);
extern struct tty_driver *console_device(int *);
extern void console_stop(struct console *);
extern void console_start(struct console *);
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index c6eebb839552..51e131245379 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -50,12 +50,16 @@ struct vm_struct {
struct vmap_area {
unsigned long va_start;
unsigned long va_end;
+
+ /*
+ * Largest available free size in subtree.
+ */
+ unsigned long subtree_max_size;
unsigned long flags;
struct rb_node rb_node; /* address sorted rbtree */
struct list_head list; /* address sorted list */
struct llist_node purge_list; /* "lazy purge" list */
struct vm_struct *vm;
- struct rcu_head rcu_head;
};
/*
diff --git a/init/initramfs.c b/init/initramfs.c
index 435a428c2af1..178130fd61c2 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -669,7 +669,7 @@ done:
* If the initrd region is overlapped with crashkernel reserved region,
* free only memory that is not part of crashkernel region.
*/
- if (!do_retain_initrd && !kexec_free_initrd())
+ if (!do_retain_initrd && initrd_start && !kexec_free_initrd())
free_initrd_mem(initrd_start, initrd_end);
initrd_start = 0;
initrd_end = 0;
diff --git a/kernel/panic.c b/kernel/panic.c
index 8779d64bace0..b4543a31a495 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -51,6 +51,7 @@ EXPORT_SYMBOL_GPL(panic_timeout);
#define PANIC_PRINT_TIMER_INFO 0x00000004
#define PANIC_PRINT_LOCK_INFO 0x00000008
#define PANIC_PRINT_FTRACE_INFO 0x00000010
+#define PANIC_PRINT_ALL_PRINTK_MSG 0x00000020
unsigned long panic_print;
ATOMIC_NOTIFIER_HEAD(panic_notifier_list);
@@ -134,6 +135,9 @@ EXPORT_SYMBOL(nmi_panic);
static void panic_print_sys_info(void)
{
+ if (panic_print & PANIC_PRINT_ALL_PRINTK_MSG)
+ console_flush_on_panic(CONSOLE_REPLAY_ALL);
+
if (panic_print & PANIC_PRINT_TASK_INFO)
show_state();
@@ -277,7 +281,7 @@ void panic(const char *fmt, ...)
* panic() is not being callled from OOPS.
*/
debug_locks_off();
- console_flush_on_panic();
+ console_flush_on_panic(CONSOLE_FLUSH_PENDING);
panic_print_sys_info();
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 17102fd4c136..a6e06fe38e41 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -2535,10 +2535,11 @@ void console_unblank(void)
/**
* console_flush_on_panic - flush console content on panic
+ * @mode: flush all messages in buffer or just the pending ones
*
* Immediately output all pending messages no matter what.
*/
-void console_flush_on_panic(void)
+void console_flush_on_panic(enum con_flush_mode mode)
{
/*
* If someone else is holding the console lock, trylock will fail
@@ -2549,6 +2550,15 @@ void console_flush_on_panic(void)
*/
console_trylock();
console_may_schedule = 0;
+
+ if (mode == CONSOLE_REPLAY_ALL) {
+ unsigned long flags;
+
+ logbuf_lock_irqsave(flags);
+ console_seq = log_first_seq;
+ console_idx = log_first_idx;
+ logbuf_unlock_irqrestore(flags);
+ }
console_unlock();
}
diff --git a/mm/compaction.c b/mm/compaction.c
index cbac7277978a..9febc8cc84e7 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1230,7 +1230,7 @@ fast_isolate_around(struct compact_control *cc, unsigned long pfn, unsigned long
/* Pageblock boundaries */
start_pfn = pageblock_start_pfn(pfn);
- end_pfn = min(start_pfn + pageblock_nr_pages, zone_end_pfn(cc->zone));
+ end_pfn = min(pageblock_end_pfn(pfn), zone_end_pfn(cc->zone)) - 1;
/* Scan before */
if (start_pfn != pfn) {
@@ -1241,7 +1241,7 @@ fast_isolate_around(struct compact_control *cc, unsigned long pfn, unsigned long
/* Scan after */
start_pfn = pfn + nr_isolated;
- if (start_pfn != end_pfn)
+ if (start_pfn < end_pfn)
isolate_freepages_block(cc, &start_pfn, end_pfn, &cc->freepages, 1, false);
/* Skip this pageblock in the future as it's full or nearly full */
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 67bbb8d2a0a8..c42872ed82ac 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -32,6 +32,7 @@
#include <linux/compiler.h>
#include <linux/llist.h>
#include <linux/bitops.h>
+#include <linux/rbtree_augmented.h>
#include <linux/uaccess.h>
#include <asm/tlbflush.h>
@@ -324,6 +325,9 @@ EXPORT_SYMBOL(vmalloc_to_pfn);
/*** Global kva allocator ***/
+#define DEBUG_AUGMENT_PROPAGATE_CHECK 0
+#define DEBUG_AUGMENT_LOWEST_MATCH_CHECK 0
+
#define VM_LAZY_FREE 0x02
#define VM_VM_AREA 0x04
@@ -332,14 +336,67 @@ static DEFINE_SPINLOCK(vmap_area_lock);
LIST_HEAD(vmap_area_list);
static LLIST_HEAD(vmap_purge_list);
static struct rb_root vmap_area_root = RB_ROOT;
+static bool vmap_initialized __read_mostly;
+
+/*
+ * This kmem_cache is used for vmap_area objects. Instead of
+ * allocating from slab we reuse an object from this cache to
+ * make things faster. Especially in "no edge" splitting of
+ * free block.
+ */
+static struct kmem_cache *vmap_area_cachep;
+
+/*
+ * This linked list is used in pair with free_vmap_area_root.
+ * It gives O(1) access to prev/next to perform fast coalescing.
+ */
+static LIST_HEAD(free_vmap_area_list);
+
+/*
+ * This augment red-black tree represents the free vmap space.
+ * All vmap_area objects in this tree are sorted by va->va_start
+ * address. It is used for allocation and merging when a vmap
+ * object is released.
+ *
+ * Each vmap_area node contains a maximum available free block
+ * of its sub-tree, right or left. Therefore it is possible to
+ * find a lowest match of free area.
+ */
+static struct rb_root free_vmap_area_root = RB_ROOT;
+
+static __always_inline unsigned long
+va_size(struct vmap_area *va)
+{
+ return (va->va_end - va->va_start);
+}
+
+static __always_inline unsigned long
+get_subtree_max_size(struct rb_node *node)
+{
+ struct vmap_area *va;
+
+ va = rb_entry_safe(node, struct vmap_area, rb_node);
+ return va ? va->subtree_max_size : 0;
+}
+
+/*
+ * Gets called when remove the node and rotate.
+ */
+static __always_inline unsigned long
+compute_subtree_max_size(struct vmap_area *va)
+{
+ return max3(va_size(va),
+ get_subtree_max_size(va->rb_node.rb_left),
+ get_subtree_max_size(va->rb_node.rb_right));
+}
-/* The vmap cache globals are protected by vmap_area_lock */
-static struct rb_node *free_vmap_cache;
-static unsigned long cached_hole_size;
-static unsigned long cached_vstart;
-static unsigned long cached_align;
+RB_DECLARE_CALLBACKS(static, free_vmap_area_rb_augment_cb,
+ struct vmap_area, rb_node, unsigned long, subtree_max_size,
+ compute_subtree_max_size)
-static unsigned long vmap_area_pcpu_hole;
+static void purge_vmap_area_lazy(void);
+static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
+static unsigned long lazy_max_pages(void);
static struct vmap_area *__find_vmap_area(unsigned long addr)
{
@@ -360,41 +417,610 @@ static struct vmap_area *__find_vmap_area(unsigned long addr)
return NULL;
}
-static void __insert_vmap_area(struct vmap_area *va)
-{
- struct rb_node **p = &vmap_area_root.rb_node;
- struct rb_node *parent = NULL;
- struct rb_node *tmp;
+/*
+ * This function returns back addresses of parent node
+ * and its left or right link for further processing.
+ */
+static __always_inline struct rb_node **
+find_va_links(struct vmap_area *va,
+ struct rb_root *root, struct rb_node *from,
+ struct rb_node **parent)
+{
+ struct vmap_area *tmp_va;
+ struct rb_node **link;
+
+ if (root) {
+ link = &root->rb_node;
+ if (unlikely(!*link)) {
+ *parent = NULL;
+ return link;
+ }
+ } else {
+ link = &from;
+ }
- while (*p) {
- struct vmap_area *tmp_va;
+ /*
+ * Go to the bottom of the tree. When we hit the last point
+ * we end up with parent rb_node and correct direction, i name
+ * it link, where the new va->rb_node will be attached to.
+ */
+ do {
+ tmp_va = rb_entry(*link, struct vmap_area, rb_node);
- parent = *p;
- tmp_va = rb_entry(parent, struct vmap_area, rb_node);
- if (va->va_start < tmp_va->va_end)
- p = &(*p)->rb_left;
- else if (va->va_end > tmp_va->va_start)
- p = &(*p)->rb_right;
+ /*
+ * During the traversal we also do some sanity check.
+ * Trigger the BUG() if there are sides(left/right)
+ * or full overlaps.
+ */
+ if (va->va_start < tmp_va->va_end &&
+ va->va_end <= tmp_va->va_start)
+ link = &(*link)->rb_left;
+ else if (va->va_end > tmp_va->va_start &&
+ va->va_start >= tmp_va->va_end)
+ link = &(*link)->rb_right;
else
BUG();
+ } while (*link);
+
+ *parent = &tmp_va->rb_node;
+ return link;
+}
+
+static __always_inline struct list_head *
+get_va_next_sibling(struct rb_node *parent, struct rb_node **link)
+{
+ struct list_head *list;
+
+ if (unlikely(!parent))
+ /*
+ * The red-black tree where we try to find VA neighbors
+ * before merging or inserting is empty, i.e. it means
+ * there is no free vmap space. Normally it does not
+ * happen but we handle this case anyway.
+ */
+ return NULL;
+
+ list = &rb_entry(parent, struct vmap_area, rb_node)->list;
+ return (&parent->rb_right == link ? list->next : list);
+}
+
+static __always_inline void
+link_va(struct vmap_area *va, struct rb_root *root,
+ struct rb_node *parent, struct rb_node **link, struct list_head *head)
+{
+ /*
+ * VA is still not in the list, but we can
+ * identify its future previous list_head node.
+ */
+ if (likely(parent)) {
+ head = &rb_entry(parent, struct vmap_area, rb_node)->list;
+ if (&parent->rb_right != link)
+ head = head->prev;
}
- rb_link_node(&va->rb_node, parent, p);
- rb_insert_color(&va->rb_node, &vmap_area_root);
+ /* Insert to the rb-tree */
+ rb_link_node(&va->rb_node, parent, link);
+ if (root == &free_vmap_area_root) {
+ /*
+ * Some explanation here. Just perform simple insertion
+ * to the tree. We do not set va->subtree_max_size to
+ * its current size before calling rb_insert_augmented().
+ * It is because of we populate the tree from the bottom
+ * to parent levels when the node _is_ in the tree.
+ *
+ * Therefore we set subtree_max_size to zero after insertion,
+ * to let __augment_tree_propagate_from() puts everything to
+ * the correct order later on.
+ */
+ rb_insert_augmented(&va->rb_node,
+ root, &free_vmap_area_rb_augment_cb);
+ va->subtree_max_size = 0;
+ } else {
+ rb_insert_color(&va->rb_node, root);
+ }
- /* address-sort this list */
- tmp = rb_prev(&va->rb_node);
- if (tmp) {
- struct vmap_area *prev;
- prev = rb_entry(tmp, struct vmap_area, rb_node);
- list_add_rcu(&va->list, &prev->list);
- } else
- list_add_rcu(&va->list, &vmap_area_list);
+ /* Address-sort this list */
+ list_add(&va->list, head);
}
-static void purge_vmap_area_lazy(void);
+static __always_inline void
+unlink_va(struct vmap_area *va, struct rb_root *root)
+{
+ /*
+ * During merging a VA node can be empty, therefore
+ * not linked with the tree nor list. Just check it.
+ */
+ if (!RB_EMPTY_NODE(&va->rb_node)) {
+ if (root == &free_vmap_area_root)
+ rb_erase_augmented(&va->rb_node,
+ root, &free_vmap_area_rb_augment_cb);
+ else
+ rb_erase(&va->rb_node, root);
-static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
+ list_del(&va->list);
+ RB_CLEAR_NODE(&va->rb_node);
+ }
+}
+
+#if DEBUG_AUGMENT_PROPAGATE_CHECK
+static void
+augment_tree_propagate_check(struct rb_node *n)
+{
+ struct vmap_area *va;
+ struct rb_node *node;
+ unsigned long size;
+ bool found = false;
+
+ if (n == NULL)
+ return;
+
+ va = rb_entry(n, struct vmap_area, rb_node);
+ size = va->subtree_max_size;
+ node = n;
+
+ while (node) {
+ va = rb_entry(node, struct vmap_area, rb_node);
+
+ if (get_subtree_max_size(node->rb_left) == size) {
+ node = node->rb_left;
+ } else {
+ if (va_size(va) == size) {
+ found = true;
+ break;
+ }
+
+ node = node->rb_right;
+ }
+ }
+
+ if (!found) {
+ va = rb_entry(n, struct vmap_area, rb_node);
+ pr_emerg("tree is corrupted: %lu, %lu\n",
+ va_size(va), va->subtree_max_size);
+ }
+
+ augment_tree_propagate_check(n->rb_left);
+ augment_tree_propagate_check(n->rb_right);
+}
+#endif
+
+/*
+ * This function populates subtree_max_size from bottom to upper
+ * levels starting from VA point. The propagation must be done
+ * when VA size is modified by changing its va_start/va_end. Or
+ * in case of newly inserting of VA to the tree.
+ *
+ * It means that __augment_tree_propagate_from() must be called:
+ * - After VA has been inserted to the tree(free path);
+ * - After VA has been shrunk(allocation path);
+ * - After VA has been increased(merging path).
+ *
+ * Please note that, it does not mean that upper parent nodes
+ * and their subtree_max_size are recalculated all the time up
+ * to the root node.
+ *
+ * 4--8
+ * /\
+ * / \
+ * / \
+ * 2--2 8--8
+ *
+ * For example if we modify the node 4, shrinking it to 2, then
+ * no any modification is required. If we shrink the node 2 to 1
+ * its subtree_max_size is updated only, and set to 1. If we shrink
+ * the node 8 to 6, then its subtree_max_size is set to 6 and parent
+ * node becomes 4--6.
+ */
+static __always_inline void
+augment_tree_propagate_from(struct vmap_area *va)
+{
+ struct rb_node *node = &va->rb_node;
+ unsigned long new_va_sub_max_size;
+
+ while (node) {
+ va = rb_entry(node, struct vmap_area, rb_node);
+ new_va_sub_max_size = compute_subtree_max_size(va);
+
+ /*
+ * If the newly calculated maximum available size of the
+ * subtree is equal to the current one, then it means that
+ * the tree is propagated correctly. So we have to stop at
+ * this point to save cycles.
+ */
+ if (va->subtree_max_size == new_va_sub_max_size)
+ break;
+
+ va->subtree_max_size = new_va_sub_max_size;
+ node = rb_parent(&va->rb_node);
+ }
+
+#if DEBUG_AUGMENT_PROPAGATE_CHECK
+ augment_tree_propagate_check(free_vmap_area_root.rb_node);
+#endif
+}
+
+static void
+insert_vmap_area(struct vmap_area *va,
+ struct rb_root *root, struct list_head *head)
+{
+ struct rb_node **link;
+ struct rb_node *parent;
+
+ link = find_va_links(va, root, NULL, &parent);
+ link_va(va, root, parent, link, head);
+}
+
+static void
+insert_vmap_area_augment(struct vmap_area *va,
+ struct rb_node *from, struct rb_root *root,
+ struct list_head *head)
+{
+ struct rb_node **link;
+ struct rb_node *parent;
+
+ if (from)
+ link = find_va_links(va, NULL, from, &parent);
+ else
+ link = find_va_links(va, root, NULL, &parent);
+
+ link_va(va, root, parent, link, head);
+ augment_tree_propagate_from(va);
+}
+
+/*
+ * Merge de-allocated chunk of VA memory with previous
+ * and next free blocks. If coalesce is not done a new
+ * free area is inserted. If VA has been merged, it is
+ * freed.
+ */
+static __always_inline void
+merge_or_add_vmap_area(struct vmap_area *va,
+ struct rb_root *root, struct list_head *head)
+{
+ struct vmap_area *sibling;
+ struct list_head *next;
+ struct rb_node **link;
+ struct rb_node *parent;
+ bool merged = false;
+
+ /*
+ * Find a place in the tree where VA potentially will be
+ * inserted, unless it is merged with its sibling/siblings.
+ */
+ link = find_va_links(va, root, NULL, &parent);
+
+ /*
+ * Get next node of VA to check if merging can be done.
+ */
+ next = get_va_next_sibling(parent, link);
+ if (unlikely(next == NULL))
+ goto insert;
+
+ /*
+ * start end
+ * | |
+ * |<------VA------>|<-----Next----->|
+ * | |
+ * start end
+ */
+ if (next != head) {
+ sibling = list_entry(next, struct vmap_area, list);
+ if (sibling->va_start == va->va_end) {
+ sibling->va_start = va->va_start;
+
+ /* Check and update the tree if needed. */
+ augment_tree_propagate_from(sibling);
+
+ /* Remove this VA, it has been merged. */
+ unlink_va(va, root);
+
+ /* Free vmap_area object. */
+ kmem_cache_free(vmap_area_cachep, va);
+
+ /* Point to the new merged area. */
+ va = sibling;
+ merged = true;
+ }
+ }
+
+ /*
+ * start end
+ * | |
+ * |<-----Prev----->|<------VA------>|
+ * | |
+ * start end
+ */
+ if (next->prev != head) {
+ sibling = list_entry(next->prev, struct vmap_area, list);
+ if (sibling->va_end == va->va_start) {
+ sibling->va_end = va->va_end;
+
+ /* Check and update the tree if needed. */
+ augment_tree_propagate_from(sibling);
+
+ /* Remove this VA, it has been merged. */
+ unlink_va(va, root);
+
+ /* Free vmap_area object. */
+ kmem_cache_free(vmap_area_cachep, va);
+
+ return;
+ }
+ }
+
+insert:
+ if (!merged) {
+ link_va(va, root, parent, link, head);
+ augment_tree_propagate_from(va);
+ }
+}
+
+static __always_inline bool
+is_within_this_va(struct vmap_area *va, unsigned long size,
+ unsigned long align, unsigned long vstart)
+{
+ unsigned long nva_start_addr;
+
+ if (va->va_start > vstart)
+ nva_start_addr = ALIGN(va->va_start, align);
+ else
+ nva_start_addr = ALIGN(vstart, align);
+
+ /* Can be overflowed due to big size or alignment. */
+ if (nva_start_addr + size < nva_start_addr ||
+ nva_start_addr < vstart)
+ return false;
+
+ return (nva_start_addr + size <= va->va_end);
+}
+
+/*
+ * Find the first free block(lowest start address) in the tree,
+ * that will accomplish the request corresponding to passing
+ * parameters.
+ */
+static __always_inline struct vmap_area *
+find_vmap_lowest_match(unsigned long size,
+ unsigned long align, unsigned long vstart)
+{
+ struct vmap_area *va;
+ struct rb_node *node;
+ unsigned long length;
+
+ /* Start from the root. */
+ node = free_vmap_area_root.rb_node;
+
+ /* Adjust the search size for alignment overhead. */
+ length = size + align - 1;
+
+ while (node) {
+ va = rb_entry(node, struct vmap_area, rb_node);
+
+ if (get_subtree_max_size(node->rb_left) >= length &&
+ vstart < va->va_start) {
+ node = node->rb_left;
+ } else {
+ if (is_within_this_va(va, size, align, vstart))
+ return va;
+
+ /*
+ * Does not make sense to go deeper towards the right
+ * sub-tree if it does not have a free block that is
+ * equal or bigger to the requested search length.
+ */
+ if (get_subtree_max_size(node->rb_right) >= length) {
+ node = node->rb_right;
+ continue;
+ }
+
+ /*
+ * OK. We roll back and find the fist right sub-tree,
+ * that will satisfy the search criteria. It can happen
+ * only once due to "vstart" restriction.
+ */
+ while ((node = rb_parent(node))) {
+ va = rb_entry(node, struct vmap_area, rb_node);
+ if (is_within_this_va(va, size, align, vstart))
+ return va;
+
+ if (get_subtree_max_size(node->rb_right) >= length &&
+ vstart <= va->va_start) {
+ node = node->rb_right;
+ break;
+ }
+ }
+ }
+ }
+
+ return NULL;
+}
+
+#if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
+#include <linux/random.h>
+
+static struct vmap_area *
+find_vmap_lowest_linear_match(unsigned long size,
+ unsigned long align, unsigned long vstart)
+{
+ struct vmap_area *va;
+
+ list_for_each_entry(va, &free_vmap_area_list, list) {
+ if (!is_within_this_va(va, size, align, vstart))
+ continue;
+
+ return va;
+ }
+
+ return NULL;
+}
+
+static void
+find_vmap_lowest_match_check(unsigned long size)
+{
+ struct vmap_area *va_1, *va_2;
+ unsigned long vstart;
+ unsigned int rnd;
+
+ get_random_bytes(&rnd, sizeof(rnd));
+ vstart = VMALLOC_START + rnd;
+
+ va_1 = find_vmap_lowest_match(size, 1, vstart);
+ va_2 = find_vmap_lowest_linear_match(size, 1, vstart);
+
+ if (va_1 != va_2)
+ pr_emerg("not lowest: t: 0x%p, l: 0x%p, v: 0x%lx\n",
+ va_1, va_2, vstart);
+}
+#endif
+
+enum fit_type {
+ NOTHING_FIT = 0,
+ FL_FIT_TYPE = 1, /* full fit */
+ LE_FIT_TYPE = 2, /* left edge fit */
+ RE_FIT_TYPE = 3, /* right edge fit */
+ NE_FIT_TYPE = 4 /* no edge fit */
+};
+
+static __always_inline enum fit_type
+classify_va_fit_type(struct vmap_area *va,
+ unsigned long nva_start_addr, unsigned long size)
+{
+ enum fit_type type;
+
+ /* Check if it is within VA. */
+ if (nva_start_addr < va->va_start ||
+ nva_start_addr + size > va->va_end)
+ return NOTHING_FIT;
+
+ /* Now classify. */
+ if (va->va_start == nva_start_addr) {
+ if (va->va_end == nva_start_addr + size)
+ type = FL_FIT_TYPE;
+ else
+ type = LE_FIT_TYPE;
+ } else if (va->va_end == nva_start_addr + size) {
+ type = RE_FIT_TYPE;
+ } else {
+ type = NE_FIT_TYPE;
+ }
+
+ return type;
+}
+
+static __always_inline int
+adjust_va_to_fit_type(struct vmap_area *va,
+ unsigned long nva_start_addr, unsigned long size,
+ enum fit_type type)
+{
+ struct vmap_area *lva;
+
+ if (type == FL_FIT_TYPE) {
+ /*
+ * No need to split VA, it fully fits.
+ *
+ * | |
+ * V NVA V
+ * |---------------|
+ */
+ unlink_va(va, &free_vmap_area_root);
+ kmem_cache_free(vmap_area_cachep, va);
+ } else if (type == LE_FIT_TYPE) {
+ /*
+ * Split left edge of fit VA.
+ *
+ * | |
+ * V NVA V R
+ * |-------|-------|
+ */
+ va->va_start += size;
+ } else if (type == RE_FIT_TYPE) {
+ /*
+ * Split right edge of fit VA.
+ *
+ * | |
+ * L V NVA V
+ * |-------|-------|
+ */
+ va->va_end = nva_start_addr;
+ } else if (type == NE_FIT_TYPE) {
+ /*
+ * Split no edge of fit VA.
+ *
+ * | |
+ * L V NVA V R
+ * |---|-------|---|
+ */
+ lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT);
+ if (unlikely(!lva))
+ return -1;
+
+ /*
+ * Build the remainder.
+ */
+ lva->va_start = va->va_start;
+ lva->va_end = nva_start_addr;
+
+ /*
+ * Shrink this VA to remaining size.
+ */
+ va->va_start = nva_start_addr + size;
+ } else {
+ return -1;
+ }
+
+ if (type != FL_FIT_TYPE) {
+ augment_tree_propagate_from(va);
+
+ if (type == NE_FIT_TYPE)
+ insert_vmap_area_augment(lva, &va->rb_node,
+ &free_vmap_area_root, &free_vmap_area_list);
+ }
+
+ return 0;
+}
+
+/*
+ * Returns a start address of the newly allocated area, if success.
+ * Otherwise a vend is returned that indicates failure.
+ */
+static __always_inline unsigned long
+__alloc_vmap_area(unsigned long size, unsigned long align,
+ unsigned long vstart, unsigned long vend, int node)
+{
+ unsigned long nva_start_addr;
+ struct vmap_area *va;
+ enum fit_type type;
+ int ret;
+
+ va = find_vmap_lowest_match(size, align, vstart);
+ if (unlikely(!va))
+ return vend;
+
+ if (va->va_start > vstart)
+ nva_start_addr = ALIGN(va->va_start, align);
+ else
+ nva_start_addr = ALIGN(vstart, align);
+
+ /* Check the "vend" restriction. */
+ if (nva_start_addr + size > vend)
+ return vend;
+
+ /* Classify what we have found. */
+ type = classify_va_fit_type(va, nva_start_addr, size);
+ if (WARN_ON_ONCE(type == NOTHING_FIT))
+ return vend;
+
+ /* Update the free vmap_area. */
+ ret = adjust_va_to_fit_type(va, nva_start_addr, size, type);
+ if (ret)
+ return vend;
+
+#if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
+ find_vmap_lowest_match_check(size);
+#endif
+
+ return nva_start_addr;
+}
/*
* Allocate a region of KVA of the specified size and alignment, within the
@@ -406,18 +1032,19 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
int node, gfp_t gfp_mask)
{
struct vmap_area *va;
- struct rb_node *n;
unsigned long addr;
int purged = 0;
- struct vmap_area *first;
BUG_ON(!size);
BUG_ON(offset_in_page(size));
BUG_ON(!is_power_of_2(align));
+ if (unlikely(!vmap_initialized))
+ return ERR_PTR(-EBUSY);
+
might_sleep();
- va = kmalloc_node(sizeof(struct vmap_area),
+ va = kmem_cache_alloc_node(vmap_area_cachep,
gfp_mask & GFP_RECLAIM_MASK, node);
if (unlikely(!va))
return ERR_PTR(-ENOMEM);
@@ -430,87 +1057,20 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
retry:
spin_lock(&vmap_area_lock);
- /*
- * Invalidate cache if we have more permissive parameters.
- * cached_hole_size notes the largest hole noticed _below_
- * the vmap_area cached in free_vmap_cache: if size fits
- * into that hole, we want to scan from vstart to reuse
- * the hole instead of allocating above free_vmap_cache.
- * Note that __free_vmap_area may update free_vmap_cache
- * without updating cached_hole_size or cached_align.
- */
- if (!free_vmap_cache ||
- size < cached_hole_size ||
- vstart < cached_vstart ||
- align < cached_align) {
-nocache:
- cached_hole_size = 0;
- free_vmap_cache = NULL;
- }
- /* record if we encounter less permissive parameters */
- cached_vstart = vstart;
- cached_align = align;
-
- /* find starting point for our search */
- if (free_vmap_cache) {
- first = rb_entry(free_vmap_cache, struct vmap_area, rb_node);
- addr = ALIGN(first->va_end, align);
- if (addr < vstart)
- goto nocache;
- if (addr + size < addr)
- goto overflow;
-
- } else {
- addr = ALIGN(vstart, align);
- if (addr + size < addr)
- goto overflow;
-
- n = vmap_area_root.rb_node;
- first = NULL;
-
- while (n) {
- struct vmap_area *tmp;
- tmp = rb_entry(n, struct vmap_area, rb_node);
- if (tmp->va_end >= addr) {
- first = tmp;
- if (tmp->va_start <= addr)
- break;
- n = n->rb_left;
- } else
- n = n->rb_right;
- }
-
- if (!first)
- goto found;
- }
-
- /* from the starting point, walk areas until a suitable hole is found */
- while (addr + size > first->va_start && addr + size <= vend) {
- if (addr + cached_hole_size < first->va_start)
- cached_hole_size = first->va_start - addr;
- addr = ALIGN(first->va_end, align);
- if (addr + size < addr)
- goto overflow;
-
- if (list_is_last(&first->list, &vmap_area_list))
- goto found;
- first = list_next_entry(first, list);
- }
-
-found:
/*
- * Check also calculated address against the vstart,
- * because it can be 0 because of big align request.
+ * If an allocation fails, the "vend" address is
+ * returned. Therefore trigger the overflow path.
*/
- if (addr + size > vend || addr < vstart)
+ addr = __alloc_vmap_area(size, align, vstart, vend, node);
+ if (unlikely(addr == vend))
goto overflow;
va->va_start = addr;
va->va_end = addr + size;
va->flags = 0;
- __insert_vmap_area(va);
- free_vmap_cache = &va->rb_node;
+ insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
+
spin_unlock(&vmap_area_lock);
BUG_ON(!IS_ALIGNED(va->va_start, align));
@@ -539,7 +1099,8 @@ overflow:
if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit())
pr_warn("vmap allocation for size %lu failed: use vmalloc=<size> to increase size\n",
size);
- kfree(va);
+
+ kmem_cache_free(vmap_area_cachep, va);
return ERR_PTR(-EBUSY);
}
@@ -559,35 +1120,16 @@ static void __free_vmap_area(struct vmap_area *va)
{
BUG_ON(RB_EMPTY_NODE(&va->rb_node));
- if (free_vmap_cache) {
- if (va->va_end < cached_vstart) {
- free_vmap_cache = NULL;
- } else {
- struct vmap_area *cache;
- cache = rb_entry(free_vmap_cache, struct vmap_area, rb_node);
- if (va->va_start <= cache->va_start) {
- free_vmap_cache = rb_prev(&va->rb_node);
- /*
- * We don't try to update cached_hole_size or
- * cached_align, but it won't go very wrong.
- */
- }
- }
- }
- rb_erase(&va->rb_node, &vmap_area_root);
- RB_CLEAR_NODE(&va->rb_node);
- list_del_rcu(&va->list);
-
/*
- * Track the highest possible candidate for pcpu area
- * allocation. Areas outside of vmalloc area can be returned
- * here too, consider only end addresses which fall inside
- * vmalloc area proper.
+ * Remove from the busy tree/list.
*/
- if (va->va_end > VMALLOC_START && va->va_end <= VMALLOC_END)
- vmap_area_pcpu_hole = max(vmap_area_pcpu_hole, va->va_end);
+ unlink_va(va, &vmap_area_root);
- kfree_rcu(va, rcu_head);
+ /*
+ * Merge VA with its neighbors, otherwise just add it.
+ */
+ merge_or_add_vmap_area(va,
+ &free_vmap_area_root, &free_vmap_area_list);
}
/*
@@ -794,8 +1336,6 @@ static struct vmap_area *find_vmap_area(unsigned long addr)
#define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE)
-static bool vmap_initialized __read_mostly = false;
-
struct vmap_block_queue {
spinlock_t lock;
struct list_head free;
@@ -1256,12 +1796,58 @@ void __init vm_area_register_early(struct vm_struct *vm, size_t align)
vm_area_add_early(vm);
}
+static void vmap_init_free_space(void)
+{
+ unsigned long vmap_start = 1;
+ const unsigned long vmap_end = ULONG_MAX;
+ struct vmap_area *busy, *free;
+
+ /*
+ * B F B B B F
+ * -|-----|.....|-----|-----|-----|.....|-
+ * | The KVA space |
+ * |<--------------------------------->|
+ */
+ list_for_each_entry(busy, &vmap_area_list, list) {
+ if (busy->va_start - vmap_start > 0) {
+ free = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
+ if (!WARN_ON_ONCE(!free)) {
+ free->va_start = vmap_start;
+ free->va_end = busy->va_start;
+
+ insert_vmap_area_augment(free, NULL,
+ &free_vmap_area_root,
+ &free_vmap_area_list);
+ }
+ }
+
+ vmap_start = busy->va_end;
+ }
+
+ if (vmap_end - vmap_start > 0) {
+ free = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
+ if (!WARN_ON_ONCE(!free)) {
+ free->va_start = vmap_start;
+ free->va_end = vmap_end;
+
+ insert_vmap_area_augment(free, NULL,
+ &free_vmap_area_root,
+ &free_vmap_area_list);
+ }
+ }
+}
+
void __init vmalloc_init(void)
{
struct vmap_area *va;
struct vm_struct *tmp;
int i;
+ /*
+ * Create the cache for vmap_area objects.
+ */
+ vmap_area_cachep = KMEM_CACHE(vmap_area, SLAB_PANIC);
+
for_each_possible_cpu(i) {
struct vmap_block_queue *vbq;
struct vfree_deferred *p;
@@ -1276,16 +1862,21 @@ void __init vmalloc_init(void)
/* Import existing vmlist entries. */
for (tmp = vmlist; tmp; tmp = tmp->next) {
- va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT);
+ va = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
+ if (WARN_ON_ONCE(!va))
+ continue;
+
va->flags = VM_VM_AREA;
va->va_start = (unsigned long)tmp->addr;
va->va_end = va->va_start + tmp->size;
va->vm = tmp;
- __insert_vmap_area(va);
+ insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
}
- vmap_area_pcpu_hole = VMALLOC_END;
-
+ /*
+ * Now we can initialize a free vmap space.
+ */
+ vmap_init_free_space();
vmap_initialized = true;
}
@@ -2477,81 +3068,64 @@ static struct vmap_area *node_to_va(struct rb_node *n)
}
/**
- * pvm_find_next_prev - find the next and prev vmap_area surrounding @end
- * @end: target address
- * @pnext: out arg for the next vmap_area
- * @pprev: out arg for the previous vmap_area
- *
- * Returns: %true if either or both of next and prev are found,
- * %false if no vmap_area exists
+ * pvm_find_va_enclose_addr - find the vmap_area @addr belongs to
+ * @addr: target address
*
- * Find vmap_areas end addresses of which enclose @end. ie. if not
- * NULL, *pnext->va_end > @end and *pprev->va_end <= @end.
+ * Returns: vmap_area if it is found. If there is no such area
+ * the first highest(reverse order) vmap_area is returned
+ * i.e. va->va_start < addr && va->va_end < addr or NULL
+ * if there are no any areas before @addr.
*/
-static bool pvm_find_next_prev(unsigned long end,
- struct vmap_area **pnext,
- struct vmap_area **pprev)
+static struct vmap_area *
+pvm_find_va_enclose_addr(unsigned long addr)
{
- struct rb_node *n = vmap_area_root.rb_node;
- struct vmap_area *va = NULL;
+ struct vmap_area *va, *tmp;
+ struct rb_node *n;
+
+ n = free_vmap_area_root.rb_node;
+ va = NULL;
while (n) {
- va = rb_entry(n, struct vmap_area, rb_node);
- if (end < va->va_end)
- n = n->rb_left;
- else if (end > va->va_end)
+ tmp = rb_entry(n, struct vmap_area, rb_node);
+ if (tmp->va_start <= addr) {
+ va = tmp;
+ if (tmp->va_end >= addr)
+ break;
+
n = n->rb_right;
- else
- break;
+ } else {
+ n = n->rb_left;
+ }
}
- if (!va)
- return false;
-
- if (va->va_end > end) {
- *pnext = va;
- *pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
- } else {
- *pprev = va;
- *pnext = node_to_va(rb_next(&(*pprev)->rb_node));
- }
- return true;
+ return va;
}
/**
- * pvm_determine_end - find the highest aligned address between two vmap_areas
- * @pnext: in/out arg for the next vmap_area
- * @pprev: in/out arg for the previous vmap_area
- * @align: alignment
+ * pvm_determine_end_from_reverse - find the highest aligned address
+ * of free block below VMALLOC_END
+ * @va:
+ * in - the VA we start the search(reverse order);
+ * out - the VA with the highest aligned end address.
*
- * Returns: determined end address
- *
- * Find the highest aligned address between *@pnext and *@pprev below
- * VMALLOC_END. *@pnext and *@pprev are adjusted so that the aligned
- * down address is between the end addresses of the two vmap_areas.
- *
- * Please note that the address returned by this function may fall
- * inside *@pnext vmap_area. The caller is responsible for checking
- * that.
+ * Returns: determined end address within vmap_area
*/
-static unsigned long pvm_determine_end(struct vmap_area **pnext,
- struct vmap_area **pprev,
- unsigned long align)
+static unsigned long
+pvm_determine_end_from_reverse(struct vmap_area **va, unsigned long align)
{
- const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
+ unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
unsigned long addr;
- if (*pnext)
- addr = min((*pnext)->va_start & ~(align - 1), vmalloc_end);
- else
- addr = vmalloc_end;
-
- while (*pprev && (*pprev)->va_end > addr) {
- *pnext = *pprev;
- *pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
+ if (likely(*va)) {
+ list_for_each_entry_from_reverse((*va),
+ &free_vmap_area_list, list) {
+ addr = min((*va)->va_end & ~(align - 1), vmalloc_end);
+ if ((*va)->va_start < addr)
+ return addr;
+ }
}
- return addr;
+ return 0;
}
/**
@@ -2571,12 +3145,12 @@ static unsigned long pvm_determine_end(struct vmap_area **pnext,
* to gigabytes. To avoid interacting with regular vmallocs, these
* areas are allocated from top.
*
- * Despite its complicated look, this allocator is rather simple. It
- * does everything top-down and scans areas from the end looking for
- * matching slot. While scanning, if any of the areas overlaps with
- * existing vmap_area, the base address is pulled down to fit the
- * area. Scanning is repeated till all the areas fit and then all
- * necessary data structures are inserted and the result is returned.
+ * Despite its complicated look, this allocator is rather simple. It
+ * does everything top-down and scans free blocks from the end looking
+ * for matching base. While scanning, if any of the areas do not fit the
+ * base address is pulled down to fit the area. Scanning is repeated till
+ * all the areas fit and then all necessary data structures are inserted
+ * and the result is returned.
*/
struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
const size_t *sizes, int nr_vms,
@@ -2584,11 +3158,12 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
{
const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align);
const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
- struct vmap_area **vas, *prev, *next;
+ struct vmap_area **vas, *va;
struct vm_struct **vms;
int area, area2, last_area, term_area;
- unsigned long base, start, end, last_end;
+ unsigned long base, start, size, end, last_end;
bool purged = false;
+ enum fit_type type;
/* verify parameters and allocate data structures */
BUG_ON(offset_in_page(align) || !is_power_of_2(align));
@@ -2624,7 +3199,7 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
goto err_free2;
for (area = 0; area < nr_vms; area++) {
- vas[area] = kzalloc(sizeof(struct vmap_area), GFP_KERNEL);
+ vas[area] = kmem_cache_zalloc(vmap_area_cachep, GFP_KERNEL);
vms[area] = kzalloc(sizeof(struct vm_struct), GFP_KERNEL);
if (!vas[area] || !vms[area])
goto err_free;
@@ -2637,49 +3212,29 @@ retry:
start = offsets[area];
end = start + sizes[area];
- if (!pvm_find_next_prev(vmap_area_pcpu_hole, &next, &prev)) {
- base = vmalloc_end - last_end;
- goto found;
- }
- base = pvm_determine_end(&next, &prev, align) - end;
+ va = pvm_find_va_enclose_addr(vmalloc_end);
+ base = pvm_determine_end_from_reverse(&va, align) - end;
while (true) {
- BUG_ON(next && next->va_end <= base + end);
- BUG_ON(prev && prev->va_end > base + end);
-
/*
* base might have underflowed, add last_end before
* comparing.
*/
- if (base + last_end < vmalloc_start + last_end) {
- spin_unlock(&vmap_area_lock);
- if (!purged) {
- purge_vmap_area_lazy();
- purged = true;
- goto retry;
- }
- goto err_free;
- }
+ if (base + last_end < vmalloc_start + last_end)
+ goto overflow;
/*
- * If next overlaps, move base downwards so that it's
- * right below next and then recheck.
+ * Fitting base has not been found.
*/
- if (next && next->va_start < base + end) {
- base = pvm_determine_end(&next, &prev, align) - end;
- term_area = area;
- continue;
- }
+ if (va == NULL)
+ goto overflow;
/*
- * If prev overlaps, shift down next and prev and move
- * base so that it's right below new next and then
- * recheck.
+ * If this VA does not fit, move base downwards and recheck.
*/
- if (prev && prev->va_end > base + start) {
- next = prev;
- prev = node_to_va(rb_prev(&next->rb_node));
- base = pvm_determine_end(&next, &prev, align) - end;
+ if (base + start < va->va_start || base + end > va->va_end) {
+ va = node_to_va(rb_prev(&va->rb_node));
+ base = pvm_determine_end_from_reverse(&va, align) - end;
term_area = area;
continue;
}
@@ -2691,21 +3246,40 @@ retry:
area = (area + nr_vms - 1) % nr_vms;
if (area == term_area)
break;
+
start = offsets[area];
end = start + sizes[area];
- pvm_find_next_prev(base + end, &next, &prev);
+ va = pvm_find_va_enclose_addr(base + end);
}
-found:
+
/* we've found a fitting base, insert all va's */
for (area = 0; area < nr_vms; area++) {
- struct vmap_area *va = vas[area];
+ int ret;
- va->va_start = base + offsets[area];
- va->va_end = va->va_start + sizes[area];
- __insert_vmap_area(va);
- }
+ start = base + offsets[area];
+ size = sizes[area];
+
+ va = pvm_find_va_enclose_addr(start);
+ if (WARN_ON_ONCE(va == NULL))
+ /* It is a BUG(), but trigger recovery instead. */
+ goto recovery;
+
+ type = classify_va_fit_type(va, start, size);
+ if (WARN_ON_ONCE(type == NOTHING_FIT))
+ /* It is a BUG(), but trigger recovery instead. */
+ goto recovery;
+
+ ret = adjust_va_to_fit_type(va, start, size, type);
+ if (unlikely(ret))
+ goto recovery;
- vmap_area_pcpu_hole = base + offsets[last_area];
+ /* Allocated area. */
+ va = vas[area];
+ va->va_start = start;
+ va->va_end = start + size;
+
+ insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
+ }
spin_unlock(&vmap_area_lock);
@@ -2717,9 +3291,38 @@ found:
kfree(vas);
return vms;
+recovery:
+ /* Remove previously inserted areas. */
+ while (area--) {
+ __free_vmap_area(vas[area]);
+ vas[area] = NULL;
+ }
+
+overflow:
+ spin_unlock(&vmap_area_lock);
+ if (!purged) {
+ purge_vmap_area_lazy();
+ purged = true;
+
+ /* Before "retry", check if we recover. */
+ for (area = 0; area < nr_vms; area++) {
+ if (vas[area])
+ continue;
+
+ vas[area] = kmem_cache_zalloc(
+ vmap_area_cachep, GFP_KERNEL);
+ if (!vas[area])
+ goto err_free;
+ }
+
+ goto retry;
+ }
+
err_free:
for (area = 0; area < nr_vms; area++) {
- kfree(vas[area]);
+ if (vas[area])
+ kmem_cache_free(vmap_area_cachep, vas[area]);
+
kfree(vms[area]);
}
err_free2:
AK 2 #define STT_NOTYPE 0 #define STT_OBJECT 1 #define STT_FUNC 2 #define STT_SECTION 3 #define STT_FILE 4 #define ELF_ST_BIND(x) ((x) >> 4) #define ELF_ST_TYPE(x) (((unsigned int) x) & 0xf) #define ELF_ST_INFO(bind, type) (((bind) << 4) | ((type) & 0xf)) #define ELF32_ST_BIND(x) ELF_ST_BIND(x) #define ELF32_ST_TYPE(x) ELF_ST_TYPE(x) #define ELF64_ST_BIND(x) ELF_ST_BIND(x) #define ELF64_ST_TYPE(x) ELF_ST_TYPE(x) /* Symbolic values for the entries in the auxiliary table put on the initial stack */ #define AT_NULL 0 /* end of vector */ #define AT_IGNORE 1 /* entry should be ignored */ #define AT_EXECFD 2 /* file descriptor of program */ #define AT_PHDR 3 /* program headers for program */ #define AT_PHENT 4 /* size of program header entry */ #define AT_PHNUM 5 /* number of program headers */ #define AT_PAGESZ 6 /* system page size */ #define AT_BASE 7 /* base address of interpreter */ #define AT_FLAGS 8 /* flags */ #define AT_ENTRY 9 /* entry point of program */ #define AT_NOTELF 10 /* program is not ELF */ #define AT_UID 11 /* real uid */ #define AT_EUID 12 /* effective uid */ #define AT_GID 13 /* real gid */ #define AT_EGID 14 /* effective gid */ #define AT_PLATFORM 15 /* string identifying CPU for optimizations */ #define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */ #define AT_CLKTCK 17 /* frequency at which times() increments */ #define AT_FPUCW 18 /* info about fpu initialization by kernel */ #define AT_DCACHEBSIZE 19 /* data cache block size */ #define AT_ICACHEBSIZE 20 /* instruction cache block size */ #define AT_UCACHEBSIZE 21 /* unified cache block size */ #define AT_IGNOREPPC 22 /* ppc only; entry should be ignored */ #define AT_SECURE 23 /* boolean, was exec suid-like? */ #define AT_BASE_PLATFORM 24 /* string identifying real platforms */ #define AT_RANDOM 25 /* address of 16 random bytes */ #define AT_HWCAP2 26 /* extension of AT_HWCAP */ #define AT_EXECFN 31 /* filename of the executable */ #define AT_SYSINFO 32 /* address of kernel entry point */ #define AT_SYSINFO_EHDR 33 /* address of kernel vdso */ #define AT_L1I_CACHESHAPE 34 /* shapes of the caches: */ #define AT_L1D_CACHESHAPE 35 /* bits 0-3: cache associativity. */ #define AT_L2_CACHESHAPE 36 /* bits 4-7: log2 of line size. */ #define AT_L3_CACHESHAPE 37 /* val&~255: cache size. */ typedef struct dynamic{ Elf32_Sword d_tag; union{ Elf32_Sword d_val; Elf32_Addr d_ptr; } d_un; } Elf32_Dyn; typedef struct { Elf64_Sxword d_tag; /* entry tag value */ union { Elf64_Xword d_val; Elf64_Addr d_ptr; } d_un; } Elf64_Dyn; /* The following are used with relocations */ #define ELF32_R_SYM(x) ((x) >> 8) #define ELF32_R_TYPE(x) ((x) & 0xff) #define ELF64_R_SYM(i) ((i) >> 32) #define ELF64_R_TYPE(i) ((i) & 0xffffffff) #define ELF64_R_TYPE_DATA(i) (((ELF64_R_TYPE(i) >> 8) ^ 0x00800000) - 0x00800000) #define R_386_NONE 0 #define R_386_32 1 #define R_386_PC32 2 #define R_386_GOT32 3 #define R_386_PLT32 4 #define R_386_COPY 5 #define R_386_GLOB_DAT 6 #define R_386_JMP_SLOT 7 #define R_386_RELATIVE 8 #define R_386_GOTOFF 9 #define R_386_GOTPC 10 #define R_386_NUM 11 /* Not a dynamic reloc, so not included in R_386_NUM. Used in TCG. */ #define R_386_PC8 23 #define R_MIPS_NONE 0 #define R_MIPS_16 1 #define R_MIPS_32 2 #define R_MIPS_REL32 3 #define R_MIPS_26 4 #define R_MIPS_HI16 5 #define R_MIPS_LO16 6 #define R_MIPS_GPREL16 7 #define R_MIPS_LITERAL 8 #define R_MIPS_GOT16 9 #define R_MIPS_PC16 10 #define R_MIPS_CALL16 11 #define R_MIPS_GPREL32 12 /* The remaining relocs are defined on Irix, although they are not in the MIPS ELF ABI. */ #define R_MIPS_UNUSED1 13 #define R_MIPS_UNUSED2 14 #define R_MIPS_UNUSED3 15 #define R_MIPS_SHIFT5 16 #define R_MIPS_SHIFT6 17 #define R_MIPS_64 18 #define R_MIPS_GOT_DISP 19 #define R_MIPS_GOT_PAGE 20 #define R_MIPS_GOT_OFST 21 /* * The following two relocation types are specified in the MIPS ABI * conformance guide version 1.2 but not yet in the psABI. */ #define R_MIPS_GOTHI16 22 #define R_MIPS_GOTLO16 23 #define R_MIPS_SUB 24 #define R_MIPS_INSERT_A 25 #define R_MIPS_INSERT_B 26 #define R_MIPS_DELETE 27 #define R_MIPS_HIGHER 28 #define R_MIPS_HIGHEST 29 /* * The following two relocation types are specified in the MIPS ABI * conformance guide version 1.2 but not yet in the psABI. */ #define R_MIPS_CALLHI16 30 #define R_MIPS_CALLLO16 31 /* * This range is reserved for vendor specific relocations. */ #define R_MIPS_LOVENDOR 100 #define R_MIPS_HIVENDOR 127 /* SUN SPARC specific definitions. */ /* Values for Elf64_Ehdr.e_flags. */ #define EF_SPARCV9_MM 3 #define EF_SPARCV9_TSO 0 #define EF_SPARCV9_PSO 1 #define EF_SPARCV9_RMO 2 #define EF_SPARC_LEDATA 0x800000 /* little endian data */ #define EF_SPARC_EXT_MASK 0xFFFF00 #define EF_SPARC_32PLUS 0x000100 /* generic V8+ features */ #define EF_SPARC_SUN_US1 0x000200 /* Sun UltraSPARC1 extensions */ #define EF_SPARC_HAL_R1 0x000400 /* HAL R1 extensions */ #define EF_SPARC_SUN_US3 0x000800 /* Sun UltraSPARCIII extensions */ /* * Sparc ELF relocation types */ #define R_SPARC_NONE 0 #define R_SPARC_8 1 #define R_SPARC_16 2 #define R_SPARC_32 3 #define R_SPARC_DISP8 4 #define R_SPARC_DISP16 5 #define R_SPARC_DISP32 6 #define R_SPARC_WDISP30 7 #define R_SPARC_WDISP22 8 #define R_SPARC_HI22 9 #define R_SPARC_22 10 #define R_SPARC_13 11 #define R_SPARC_LO10 12 #define R_SPARC_GOT10 13 #define R_SPARC_GOT13 14 #define R_SPARC_GOT22 15 #define R_SPARC_PC10 16 #define R_SPARC_PC22 17 #define R_SPARC_WPLT30 18 #define R_SPARC_COPY 19 #define R_SPARC_GLOB_DAT 20 #define R_SPARC_JMP_SLOT 21 #define R_SPARC_RELATIVE 22 #define R_SPARC_UA32 23 #define R_SPARC_PLT32 24 #define R_SPARC_HIPLT22 25 #define R_SPARC_LOPLT10 26 #define R_SPARC_PCPLT32 27 #define R_SPARC_PCPLT22 28 #define R_SPARC_PCPLT10 29 #define R_SPARC_10 30 #define R_SPARC_11 31 #define R_SPARC_64 32 #define R_SPARC_OLO10 33 #define R_SPARC_HH22 34 #define R_SPARC_HM10 35 #define R_SPARC_LM22 36 #define R_SPARC_WDISP16 40 #define R_SPARC_WDISP19 41 #define R_SPARC_7 43 #define R_SPARC_5 44 #define R_SPARC_6 45 /* Bits present in AT_HWCAP for ARM. */ #define HWCAP_ARM_SWP (1 << 0) #define HWCAP_ARM_HALF (1 << 1) #define HWCAP_ARM_THUMB (1 << 2) #define HWCAP_ARM_26BIT (1 << 3) #define HWCAP_ARM_FAST_MULT (1 << 4) #define HWCAP_ARM_FPA (1 << 5) #define HWCAP_ARM_VFP (1 << 6) #define HWCAP_ARM_EDSP (1 << 7) #define HWCAP_ARM_JAVA (1 << 8) #define HWCAP_ARM_IWMMXT (1 << 9) #define HWCAP_ARM_CRUNCH (1 << 10) #define HWCAP_ARM_THUMBEE (1 << 11) #define HWCAP_ARM_NEON (1 << 12) #define HWCAP_ARM_VFPv3 (1 << 13) #define HWCAP_ARM_VFPv3D16 (1 << 14) /* also set for VFPv4-D16 */ #define HWCAP_ARM_TLS (1 << 15) #define HWCAP_ARM_VFPv4 (1 << 16) #define HWCAP_ARM_IDIVA (1 << 17) #define HWCAP_ARM_IDIVT (1 << 18) #define HWCAP_IDIV (HWCAP_IDIVA | HWCAP_IDIVT) #define HWCAP_VFPD32 (1 << 19) /* set if VFP has 32 regs */ #define HWCAP_LPAE (1 << 20) /* Bits present in AT_HWCAP for PowerPC. */ #define PPC_FEATURE_32 0x80000000 #define PPC_FEATURE_64 0x40000000 #define PPC_FEATURE_601_INSTR 0x20000000 #define PPC_FEATURE_HAS_ALTIVEC 0x10000000 #define PPC_FEATURE_HAS_FPU 0x08000000 #define PPC_FEATURE_HAS_MMU 0x04000000 #define PPC_FEATURE_HAS_4xxMAC 0x02000000 #define PPC_FEATURE_UNIFIED_CACHE 0x01000000 #define PPC_FEATURE_HAS_SPE 0x00800000 #define PPC_FEATURE_HAS_EFP_SINGLE 0x00400000 #define PPC_FEATURE_HAS_EFP_DOUBLE 0x00200000 #define PPC_FEATURE_NO_TB 0x00100000 #define PPC_FEATURE_POWER4 0x00080000 #define PPC_FEATURE_POWER5 0x00040000 #define PPC_FEATURE_POWER5_PLUS 0x00020000 #define PPC_FEATURE_CELL 0x00010000 #define PPC_FEATURE_BOOKE 0x00008000 #define PPC_FEATURE_SMT 0x00004000 #define PPC_FEATURE_ICACHE_SNOOP 0x00002000 #define PPC_FEATURE_ARCH_2_05 0x00001000 #define PPC_FEATURE_PA6T 0x00000800 #define PPC_FEATURE_HAS_DFP 0x00000400 #define PPC_FEATURE_POWER6_EXT 0x00000200 #define PPC_FEATURE_ARCH_2_06 0x00000100 #define PPC_FEATURE_HAS_VSX 0x00000080 #define PPC_FEATURE_PSERIES_PERFMON_COMPAT \ 0x00000040 #define PPC_FEATURE_TRUE_LE 0x00000002 #define PPC_FEATURE_PPC_LE 0x00000001 /* Bits present in AT_HWCAP2 for PowerPC. */ #define PPC_FEATURE2_ARCH_2_07 0x80000000 #define PPC_FEATURE2_HAS_HTM 0x40000000 #define PPC_FEATURE2_HAS_DSCR 0x20000000 #define PPC_FEATURE2_HAS_EBB 0x10000000 #define PPC_FEATURE2_HAS_ISEL 0x08000000 #define PPC_FEATURE2_HAS_TAR 0x04000000 #define PPC_FEATURE2_HAS_VEC_CRYPTO 0x02000000 #define PPC_FEATURE2_HTM_NOSC 0x01000000 #define PPC_FEATURE2_ARCH_3_00 0x00800000 #define PPC_FEATURE2_HAS_IEEE128 0x00400000 #define PPC_FEATURE2_ARCH_3_10 0x00040000 /* Bits present in AT_HWCAP for Sparc. */ #define HWCAP_SPARC_FLUSH 0x00000001 #define HWCAP_SPARC_STBAR 0x00000002 #define HWCAP_SPARC_SWAP 0x00000004 #define HWCAP_SPARC_MULDIV 0x00000008 #define HWCAP_SPARC_V9 0x00000010 #define HWCAP_SPARC_ULTRA3 0x00000020 #define HWCAP_SPARC_BLKINIT 0x00000040 #define HWCAP_SPARC_N2 0x00000080 #define HWCAP_SPARC_MUL32 0x00000100 #define HWCAP_SPARC_DIV32 0x00000200 #define HWCAP_SPARC_FSMULD 0x00000400 #define HWCAP_SPARC_V8PLUS 0x00000800 #define HWCAP_SPARC_POPC 0x00001000 #define HWCAP_SPARC_VIS 0x00002000 #define HWCAP_SPARC_VIS2 0x00004000 #define HWCAP_SPARC_ASI_BLK_INIT 0x00008000 #define HWCAP_SPARC_FMAF 0x00010000 #define HWCAP_SPARC_VIS3 0x00020000 #define HWCAP_SPARC_HPC 0x00040000 #define HWCAP_SPARC_RANDOM 0x00080000 #define HWCAP_SPARC_TRANS 0x00100000 #define HWCAP_SPARC_FJFMAU 0x00200000 #define HWCAP_SPARC_IMA 0x00400000 #define HWCAP_SPARC_ASI_CACHE_SPARING 0x00800000 #define HWCAP_SPARC_PAUSE 0x01000000 #define HWCAP_SPARC_CBCOND 0x02000000 #define HWCAP_SPARC_CRYPTO 0x04000000 /* Bits present in AT_HWCAP for s390. */ #define HWCAP_S390_NR_ESAN3 0 #define HWCAP_S390_NR_ZARCH 1 #define HWCAP_S390_NR_STFLE 2 #define HWCAP_S390_NR_MSA 3 #define HWCAP_S390_NR_LDISP 4 #define HWCAP_S390_NR_EIMM 5 #define HWCAP_S390_NR_DFP 6 #define HWCAP_S390_NR_HPAGE 7 #define HWCAP_S390_NR_ETF3EH 8 #define HWCAP_S390_NR_HIGH_GPRS 9 #define HWCAP_S390_NR_TE 10 #define HWCAP_S390_NR_VXRS 11 #define HWCAP_S390_NR_VXRS_BCD 12 #define HWCAP_S390_NR_VXRS_EXT 13 #define HWCAP_S390_NR_GS 14 #define HWCAP_S390_NR_VXRS_EXT2 15 #define HWCAP_S390_NR_VXRS_PDE 16 #define HWCAP_S390_NR_SORT 17 #define HWCAP_S390_NR_DFLT 18 #define HWCAP_S390_NR_VXRS_PDE2 19 #define HWCAP_S390_NR_NNPA 20 #define HWCAP_S390_NR_PCI_MIO 21 #define HWCAP_S390_NR_SIE 22 #define HWCAP_S390_ESAN3 (1 << HWCAP_S390_NR_ESAN3) #define HWCAP_S390_ZARCH (1 << HWCAP_S390_NR_ZARCH) #define HWCAP_S390_STFLE (1 << HWCAP_S390_NR_STFLE) #define HWCAP_S390_MSA (1 << HWCAP_S390_NR_MSA) #define HWCAP_S390_LDISP (1 << HWCAP_S390_NR_LDISP) #define HWCAP_S390_EIMM (1 << HWCAP_S390_NR_EIMM) #define HWCAP_S390_DFP (1 << HWCAP_S390_NR_DFP) #define HWCAP_S390_HPAGE (1 << HWCAP_S390_NR_HPAGE) #define HWCAP_S390_ETF3EH (1 << HWCAP_S390_NR_ETF3EH) #define HWCAP_S390_HIGH_GPRS (1 << HWCAP_S390_NR_HIGH_GPRS) #define HWCAP_S390_TE (1 << HWCAP_S390_NR_TE) #define HWCAP_S390_VXRS (1 << HWCAP_S390_NR_VXRS) #define HWCAP_S390_VXRS_BCD (1 << HWCAP_S390_NR_VXRS_BCD) #define HWCAP_S390_VXRS_EXT (1 << HWCAP_S390_NR_VXRS_EXT) #define HWCAP_S390_GS (1 << HWCAP_S390_NR_GS) #define HWCAP_S390_VXRS_EXT2 (1 << HWCAP_S390_NR_VXRS_EXT2) #define HWCAP_S390_VXRS_PDE (1 << HWCAP_S390_NR_VXRS_PDE) #define HWCAP_S390_SORT (1 << HWCAP_S390_NR_SORT) #define HWCAP_S390_DFLT (1 << HWCAP_S390_NR_DFLT) #define HWCAP_S390_VXRS_PDE2 (1 << HWCAP_S390_NR_VXRS_PDE2) #define HWCAP_S390_NNPA (1 << HWCAP_S390_NR_NNPA) #define HWCAP_S390_PCI_MIO (1 << HWCAP_S390_NR_PCI_MIO) #define HWCAP_S390_SIE (1 << HWCAP_S390_NR_SIE) /* M68K specific definitions. */ /* We use the top 24 bits to encode information about the architecture variant. */ #define EF_M68K_CPU32 0x00810000 #define EF_M68K_M68000 0x01000000 #define EF_M68K_CFV4E 0x00008000 #define EF_M68K_FIDO 0x02000000 #define EF_M68K_ARCH_MASK \ (EF_M68K_M68000 | EF_M68K_CPU32 | EF_M68K_CFV4E | EF_M68K_FIDO) /* We use the bottom 8 bits to encode information about the coldfire variant. If we use any of these bits, the top 24 bits are either 0 or EF_M68K_CFV4E. */ #define EF_M68K_CF_ISA_MASK 0x0F /* Which ISA */ #define EF_M68K_CF_ISA_A_NODIV 0x01 /* ISA A except for div */ #define EF_M68K_CF_ISA_A 0x02 #define EF_M68K_CF_ISA_A_PLUS 0x03 #define EF_M68K_CF_ISA_B_NOUSP 0x04 /* ISA_B except for USP */ #define EF_M68K_CF_ISA_B 0x05 #define EF_M68K_CF_ISA_C 0x06 #define EF_M68K_CF_ISA_C_NODIV 0x07 /* ISA C except for div */ #define EF_M68K_CF_MAC_MASK 0x30 #define EF_M68K_CF_MAC 0x10 /* MAC */ #define EF_M68K_CF_EMAC 0x20 /* EMAC */ #define EF_M68K_CF_EMAC_B 0x30 /* EMAC_B */ #define EF_M68K_CF_FLOAT 0x40 /* Has float insns */ #define EF_M68K_CF_MASK 0xFF /* * 68k ELF relocation types */ #define R_68K_NONE 0 #define R_68K_32 1 #define R_68K_16 2 #define R_68K_8 3 #define R_68K_PC32 4 #define R_68K_PC16 5 #define R_68K_PC8 6 #define R_68K_GOT32 7 #define R_68K_GOT16 8 #define R_68K_GOT8 9 #define R_68K_GOT32O 10 #define R_68K_GOT16O 11 #define R_68K_GOT8O 12 #define R_68K_PLT32 13 #define R_68K_PLT16 14 #define R_68K_PLT8 15 #define R_68K_PLT32O 16 #define R_68K_PLT16O 17 #define R_68K_PLT8O 18 #define R_68K_COPY 19 #define R_68K_GLOB_DAT 20 #define R_68K_JMP_SLOT 21 #define R_68K_RELATIVE 22 /* * Alpha ELF relocation types */ #define R_ALPHA_NONE 0 /* No reloc */ #define R_ALPHA_REFLONG 1 /* Direct 32 bit */ #define R_ALPHA_REFQUAD 2 /* Direct 64 bit */ #define R_ALPHA_GPREL32 3 /* GP relative 32 bit */ #define R_ALPHA_LITERAL 4 /* GP relative 16 bit w/optimization */ #define R_ALPHA_LITUSE 5 /* Optimization hint for LITERAL */ #define R_ALPHA_GPDISP 6 /* Add displacement to GP */ #define R_ALPHA_BRADDR 7 /* PC+4 relative 23 bit shifted */ #define R_ALPHA_HINT 8 /* PC+4 relative 16 bit shifted */ #define R_ALPHA_SREL16 9 /* PC relative 16 bit */ #define R_ALPHA_SREL32 10 /* PC relative 32 bit */ #define R_ALPHA_SREL64 11 /* PC relative 64 bit */ #define R_ALPHA_GPRELHIGH 17 /* GP relative 32 bit, high 16 bits */ #define R_ALPHA_GPRELLOW 18 /* GP relative 32 bit, low 16 bits */ #define R_ALPHA_GPREL16 19 /* GP relative 16 bit */ #define R_ALPHA_COPY 24 /* Copy symbol at runtime */ #define R_ALPHA_GLOB_DAT 25 /* Create GOT entry */ #define R_ALPHA_JMP_SLOT 26 /* Create PLT entry */ #define R_ALPHA_RELATIVE 27 /* Adjust by program base */ #define R_ALPHA_BRSGP 28 #define R_ALPHA_TLSGD 29 #define R_ALPHA_TLS_LDM 30 #define R_ALPHA_DTPMOD64 31 #define R_ALPHA_GOTDTPREL 32 #define R_ALPHA_DTPREL64 33 #define R_ALPHA_DTPRELHI 34 #define R_ALPHA_DTPRELLO 35 #define R_ALPHA_DTPREL16 36 #define R_ALPHA_GOTTPREL 37 #define R_ALPHA_TPREL64 38 #define R_ALPHA_TPRELHI 39 #define R_ALPHA_TPRELLO 40 #define R_ALPHA_TPREL16 41 #define SHF_ALPHA_GPREL 0x10000000 /* PowerPC specific definitions. */ /* Processor specific flags for the ELF header e_flags field. */ #define EF_PPC64_ABI 0x3 /* PowerPC relocations defined by the ABIs */ #define R_PPC_NONE 0 #define R_PPC_ADDR32 1 /* 32bit absolute address */ #define R_PPC_ADDR24 2 /* 26bit address, 2 bits ignored. */ #define R_PPC_ADDR16 3 /* 16bit absolute address */ #define R_PPC_ADDR16_LO 4 /* lower 16bit of absolute address */ #define R_PPC_ADDR16_HI 5 /* high 16bit of absolute address */ #define R_PPC_ADDR16_HA 6 /* adjusted high 16bit */ #define R_PPC_ADDR14 7 /* 16bit address, 2 bits ignored */ #define R_PPC_ADDR14_BRTAKEN 8 #define R_PPC_ADDR14_BRNTAKEN 9 #define R_PPC_REL24 10 /* PC relative 26 bit */ #define R_PPC_REL14 11 /* PC relative 16 bit */ #define R_PPC_REL14_BRTAKEN 12 #define R_PPC_REL14_BRNTAKEN 13 #define R_PPC_GOT16 14 #define R_PPC_GOT16_LO 15 #define R_PPC_GOT16_HI 16 #define R_PPC_GOT16_HA 17 #define R_PPC_PLTREL24 18 #define R_PPC_COPY 19 #define R_PPC_GLOB_DAT 20 #define R_PPC_JMP_SLOT 21 #define R_PPC_RELATIVE 22 #define R_PPC_LOCAL24PC 23 #define R_PPC_UADDR32 24 #define R_PPC_UADDR16 25 #define R_PPC_REL32 26 #define R_PPC_PLT32 27 #define R_PPC_PLTREL32 28 #define R_PPC_PLT16_LO 29 #define R_PPC_PLT16_HI 30 #define R_PPC_PLT16_HA 31 #define R_PPC_SDAREL16 32 #define R_PPC_SECTOFF 33 #define R_PPC_SECTOFF_LO 34 #define R_PPC_SECTOFF_HI 35 #define R_PPC_SECTOFF_HA 36 /* Keep this the last entry. */ #ifndef R_PPC_NUM #define R_PPC_NUM 37 #endif /* ARM specific declarations */ /* Processor specific flags for the ELF header e_flags field. */ #define EF_ARM_RELEXEC 0x01 #define EF_ARM_HASENTRY 0x02 #define EF_ARM_INTERWORK 0x04 #define EF_ARM_APCS_26 0x08 #define EF_ARM_APCS_FLOAT 0x10 #define EF_ARM_PIC 0x20 #define EF_ALIGN8 0x40 /* 8-bit structure alignment is in use */ #define EF_NEW_ABI 0x80 #define EF_OLD_ABI 0x100 #define EF_ARM_SOFT_FLOAT 0x200 #define EF_ARM_VFP_FLOAT 0x400 #define EF_ARM_MAVERICK_FLOAT 0x800 /* Other constants defined in the ARM ELF spec. version B-01. */ #define EF_ARM_SYMSARESORTED 0x04 /* NB conflicts with EF_INTERWORK */ #define EF_ARM_DYNSYMSUSESEGIDX 0x08 /* NB conflicts with EF_APCS26 */ #define EF_ARM_MAPSYMSFIRST 0x10 /* NB conflicts with EF_APCS_FLOAT */ #define EF_ARM_EABIMASK 0xFF000000 /* Constants defined in AAELF. */ #define EF_ARM_BE8 0x00800000 #define EF_ARM_LE8 0x00400000 #define EF_ARM_EABI_VERSION(flags) ((flags) & EF_ARM_EABIMASK) #define EF_ARM_EABI_UNKNOWN 0x00000000 #define EF_ARM_EABI_VER1 0x01000000 #define EF_ARM_EABI_VER2 0x02000000 #define EF_ARM_EABI_VER3 0x03000000 #define EF_ARM_EABI_VER4 0x04000000 #define EF_ARM_EABI_VER5 0x05000000 /* Additional symbol types for Thumb */ #define STT_ARM_TFUNC 0xd /* ARM-specific values for sh_flags */ #define SHF_ARM_ENTRYSECT 0x10000000 /* Section contains an entry point */ #define SHF_ARM_COMDEF 0x80000000 /* Section may be multiply defined in the input to a link step */ /* ARM-specific program header flags */ #define PF_ARM_SB 0x10000000 /* Segment contains the location addressed by the static base */ /* ARM relocs. */ #define R_ARM_NONE 0 /* No reloc */ #define R_ARM_PC24 1 /* PC relative 26 bit branch */ #define R_ARM_ABS32 2 /* Direct 32 bit */ #define R_ARM_REL32 3 /* PC relative 32 bit */ #define R_ARM_PC13 4 #define R_ARM_ABS16 5 /* Direct 16 bit */ #define R_ARM_ABS12 6 /* Direct 12 bit */ #define R_ARM_THM_ABS5 7 #define R_ARM_ABS8 8 /* Direct 8 bit */ #define R_ARM_SBREL32 9 #define R_ARM_THM_PC22 10 #define R_ARM_THM_PC8 11 #define R_ARM_AMP_VCALL9 12 #define R_ARM_SWI24 13 #define R_ARM_THM_SWI8 14 #define R_ARM_XPC25 15 #define R_ARM_THM_XPC22 16 #define R_ARM_COPY 20 /* Copy symbol at runtime */ #define R_ARM_GLOB_DAT 21 /* Create GOT entry */ #define R_ARM_JUMP_SLOT 22 /* Create PLT entry */ #define R_ARM_RELATIVE 23 /* Adjust by program base */ #define R_ARM_GOTOFF 24 /* 32 bit offset to GOT */ #define R_ARM_GOTPC 25 /* 32 bit PC relative offset to GOT */ #define R_ARM_GOT32 26 /* 32 bit GOT entry */ #define R_ARM_PLT32 27 /* 32 bit PLT address */ #define R_ARM_CALL 28 #define R_ARM_JUMP24 29 #define R_ARM_GNU_VTENTRY 100 #define R_ARM_GNU_VTINHERIT 101 #define R_ARM_THM_PC11 102 /* thumb unconditional branch */ #define R_ARM_THM_PC9 103 /* thumb conditional branch */ #define R_ARM_RXPC25 249 #define R_ARM_RSBREL32 250 #define R_ARM_THM_RPC22 251 #define R_ARM_RREL32 252 #define R_ARM_RABS22 253 #define R_ARM_RPC24 254 #define R_ARM_RBASE 255 /* Keep this the last entry. */ #define R_ARM_NUM 256 /* ARM Aarch64 relocation types */ #define R_AARCH64_NONE 256 /* also accepts R_ARM_NONE (0) */ /* static data relocations */ #define R_AARCH64_ABS64 257 #define R_AARCH64_ABS32 258 #define R_AARCH64_ABS16 259 #define R_AARCH64_PREL64 260 #define R_AARCH64_PREL32 261 #define R_AARCH64_PREL16 262 /* static aarch64 group relocations */ /* group relocs to create unsigned data value or address inline */ #define R_AARCH64_MOVW_UABS_G0 263 #define R_AARCH64_MOVW_UABS_G0_NC 264 #define R_AARCH64_MOVW_UABS_G1 265 #define R_AARCH64_MOVW_UABS_G1_NC 266 #define R_AARCH64_MOVW_UABS_G2 267 #define R_AARCH64_MOVW_UABS_G2_NC 268 #define R_AARCH64_MOVW_UABS_G3 269 /* group relocs to create signed data or offset value inline */ #define R_AARCH64_MOVW_SABS_G0 270 #define R_AARCH64_MOVW_SABS_G1 271 #define R_AARCH64_MOVW_SABS_G2 272 /* relocs to generate 19, 21, and 33 bit PC-relative addresses */ #define R_AARCH64_LD_PREL_LO19 273 #define R_AARCH64_ADR_PREL_LO21 274 #define R_AARCH64_ADR_PREL_PG_HI21 275 #define R_AARCH64_ADR_PREL_PG_HI21_NC 276 #define R_AARCH64_ADD_ABS_LO12_NC 277 #define R_AARCH64_LDST8_ABS_LO12_NC 278 #define R_AARCH64_LDST16_ABS_LO12_NC 284 #define R_AARCH64_LDST32_ABS_LO12_NC 285 #define R_AARCH64_LDST64_ABS_LO12_NC 286 #define R_AARCH64_LDST128_ABS_LO12_NC 299 /* relocs for control-flow - all offsets as multiple of 4 */ #define R_AARCH64_TSTBR14 279 #define R_AARCH64_CONDBR19 280 #define R_AARCH64_JUMP26 282 #define R_AARCH64_CALL26 283 /* group relocs to create pc-relative offset inline */ #define R_AARCH64_MOVW_PREL_G0 287 #define R_AARCH64_MOVW_PREL_G0_NC 288 #define R_AARCH64_MOVW_PREL_G1 289 #define R_AARCH64_MOVW_PREL_G1_NC 290 #define R_AARCH64_MOVW_PREL_G2 291 #define R_AARCH64_MOVW_PREL_G2_NC 292 #define R_AARCH64_MOVW_PREL_G3 293 /* group relocs to create a GOT-relative offset inline */ #define R_AARCH64_MOVW_GOTOFF_G0 300 #define R_AARCH64_MOVW_GOTOFF_G0_NC 301 #define R_AARCH64_MOVW_GOTOFF_G1 302 #define R_AARCH64_MOVW_GOTOFF_G1_NC 303 #define R_AARCH64_MOVW_GOTOFF_G2 304 #define R_AARCH64_MOVW_GOTOFF_G2_NC 305 #define R_AARCH64_MOVW_GOTOFF_G3 306 /* GOT-relative data relocs */ #define R_AARCH64_GOTREL64 307 #define R_AARCH64_GOTREL32 308 /* GOT-relative instr relocs */ #define R_AARCH64_GOT_LD_PREL19 309 #define R_AARCH64_LD64_GOTOFF_LO15 310 #define R_AARCH64_ADR_GOT_PAGE 311 #define R_AARCH64_LD64_GOT_LO12_NC 312 #define R_AARCH64_LD64_GOTPAGE_LO15 313 /* General Dynamic TLS relocations */ #define R_AARCH64_TLSGD_ADR_PREL21 512 #define R_AARCH64_TLSGD_ADR_PAGE21 513 #define R_AARCH64_TLSGD_ADD_LO12_NC 514 #define R_AARCH64_TLSGD_MOVW_G1 515 #define R_AARCH64_TLSGD_MOVW_G0_NC 516 /* Local Dynamic TLS relocations */ #define R_AARCH64_TLSLD_ADR_PREL21 517 #define R_AARCH64_TLSLD_ADR_PAGE21 518 #define R_AARCH64_TLSLD_ADD_LO12_NC 519 #define R_AARCH64_TLSLD_MOVW_G1 520 #define R_AARCH64_TLSLD_MOVW_G0_NC 521 #define R_AARCH64_TLSLD_LD_PREL19 522 #define R_AARCH64_TLSLD_MOVW_DTPREL_G2 523 #define R_AARCH64_TLSLD_MOVW_DTPREL_G1 524 #define R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC 525 #define R_AARCH64_TLSLD_MOVW_DTPREL_G0 526 #define R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC 527 #define R_AARCH64_TLSLD_ADD_DTPREL_HI12 528 #define R_AARCH64_TLSLD_ADD_DTPREL_LO12 529 #define R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC 530 #define R_AARCH64_TLSLD_LDST8_DTPREL_LO12 531 #define R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC 532 #define R_AARCH64_TLSLD_LDST16_DTPREL_LO12 533 #define R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC 534 #define R_AARCH64_TLSLD_LDST32_DTPREL_LO12 535 #define R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC 536 #define R_AARCH64_TLSLD_LDST64_DTPREL_LO12 537 #define R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC 538 /* initial exec TLS relocations */ #define R_AARCH64_TLSIE_MOVW_GOTTPREL_G1 539 #define R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC 540 #define R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 541 #define R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC 542 #define R_AARCH64_TLSIE_LD_GOTTPREL_PREL19 543 /* local exec TLS relocations */ #define R_AARCH64_TLSLE_MOVW_TPREL_G2 544 #define R_AARCH64_TLSLE_MOVW_TPREL_G1 545 #define R_AARCH64_TLSLE_MOVW_TPREL_G1_NC 546 #define R_AARCH64_TLSLE_MOVW_TPREL_G0 547 #define R_AARCH64_TLSLE_MOVW_TPREL_G0_NC 548 #define R_AARCH64_TLSLE_ADD_TPREL_HI12 549 #define R_AARCH64_TLSLE_ADD_TPREL_LO12 550 #define R_AARCH64_TLSLE_ADD_TPREL_LO12_NC 551 #define R_AARCH64_TLSLE_LDST8_TPREL_LO12 552 #define R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC 553 #define R_AARCH64_TLSLE_LDST16_TPREL_LO12 554 #define R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC 555 #define R_AARCH64_TLSLE_LDST32_TPREL_LO12 556 #define R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC 557 #define R_AARCH64_TLSLE_LDST64_TPREL_LO12 558 #define R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC 559 /* Dynamic Relocations */ #define R_AARCH64_COPY 1024 #define R_AARCH64_GLOB_DAT 1025 #define R_AARCH64_JUMP_SLOT 1026 #define R_AARCH64_RELATIVE 1027 #define R_AARCH64_TLS_DTPREL64 1028 #define R_AARCH64_TLS_DTPMOD64 1029 #define R_AARCH64_TLS_TPREL64 1030 #define R_AARCH64_TLS_DTPREL32 1031 #define R_AARCH64_TLS_DTPMOD32 1032 #define R_AARCH64_TLS_TPREL32 1033 /* s390 relocations defined by the ABIs */ #define R_390_NONE 0 /* No reloc. */ #define R_390_8 1 /* Direct 8 bit. */ #define R_390_12 2 /* Direct 12 bit. */ #define R_390_16 3 /* Direct 16 bit. */ #define R_390_32 4 /* Direct 32 bit. */ #define R_390_PC32 5 /* PC relative 32 bit. */ #define R_390_GOT12 6 /* 12 bit GOT offset. */ #define R_390_GOT32 7 /* 32 bit GOT offset. */ #define R_390_PLT32 8 /* 32 bit PC relative PLT address. */ #define R_390_COPY 9 /* Copy symbol at runtime. */ #define R_390_GLOB_DAT 10 /* Create GOT entry. */ #define R_390_JMP_SLOT 11 /* Create PLT entry. */ #define R_390_RELATIVE 12 /* Adjust by program base. */ #define R_390_GOTOFF32 13 /* 32 bit offset to GOT. */ #define R_390_GOTPC 14 /* 32 bit PC rel. offset to GOT. */ #define R_390_GOT16 15 /* 16 bit GOT offset. */ #define R_390_PC16 16 /* PC relative 16 bit. */ #define R_390_PC16DBL 17 /* PC relative 16 bit shifted by 1. */ #define R_390_PLT16DBL 18 /* 16 bit PC rel. PLT shifted by 1. */ #define R_390_PC32DBL 19 /* PC relative 32 bit shifted by 1. */ #define R_390_PLT32DBL 20 /* 32 bit PC rel. PLT shifted by 1. */ #define R_390_GOTPCDBL 21 /* 32 bit PC rel. GOT shifted by 1. */ #define R_390_64 22 /* Direct 64 bit. */ #define R_390_PC64 23 /* PC relative 64 bit. */ #define R_390_GOT64 24 /* 64 bit GOT offset. */ #define R_390_PLT64 25 /* 64 bit PC relative PLT address. */ #define R_390_GOTENT 26 /* 32 bit PC rel. to GOT entry >> 1. */ #define R_390_GOTOFF16 27 /* 16 bit offset to GOT. */ #define R_390_GOTOFF64 28 /* 64 bit offset to GOT. */ #define R_390_GOTPLT12 29 /* 12 bit offset to jump slot. */ #define R_390_GOTPLT16 30 /* 16 bit offset to jump slot. */ #define R_390_GOTPLT32 31 /* 32 bit offset to jump slot. */ #define R_390_GOTPLT64 32 /* 64 bit offset to jump slot. */ #define R_390_GOTPLTENT 33 /* 32 bit rel. offset to jump slot. */ #define R_390_PLTOFF16 34 /* 16 bit offset from GOT to PLT. */ #define R_390_PLTOFF32 35 /* 32 bit offset from GOT to PLT. */ #define R_390_PLTOFF64 36 /* 16 bit offset from GOT to PLT. */ #define R_390_TLS_LOAD 37 /* Tag for load insn in TLS code. */ #define R_390_TLS_GDCALL 38 /* Tag for function call in general dynamic TLS code. */ #define R_390_TLS_LDCALL 39 /* Tag for function call in local dynamic TLS code. */ #define R_390_TLS_GD32 40 /* Direct 32 bit for general dynamic thread local data. */ #define R_390_TLS_GD64 41 /* Direct 64 bit for general dynamic thread local data. */ #define R_390_TLS_GOTIE12 42 /* 12 bit GOT offset for static TLS block offset. */ #define R_390_TLS_GOTIE32 43 /* 32 bit GOT offset for static TLS block offset. */ #define R_390_TLS_GOTIE64 44 /* 64 bit GOT offset for static TLS block offset. */ #define R_390_TLS_LDM32 45 /* Direct 32 bit for local dynamic thread local data in LD code. */ #define R_390_TLS_LDM64 46 /* Direct 64 bit for local dynamic thread local data in LD code. */ #define R_390_TLS_IE32 47 /* 32 bit address of GOT entry for negated static TLS block offset. */ #define R_390_TLS_IE64 48 /* 64 bit address of GOT entry for negated static TLS block offset. */ #define R_390_TLS_IEENT 49 /* 32 bit rel. offset to GOT entry for negated static TLS block offset. */ #define R_390_TLS_LE32 50 /* 32 bit negated offset relative to static TLS block. */ #define R_390_TLS_LE64 51 /* 64 bit negated offset relative to static TLS block. */ #define R_390_TLS_LDO32 52 /* 32 bit offset relative to TLS block. */ #define R_390_TLS_LDO64 53 /* 64 bit offset relative to TLS block. */ #define R_390_TLS_DTPMOD 54 /* ID of module containing symbol. */ #define R_390_TLS_DTPOFF 55 /* Offset in TLS block. */ #define R_390_TLS_TPOFF 56 /* Negate offset in static TLS block. */ #define R_390_20 57 /* Keep this the last entry. */ #define R_390_NUM 58 /* x86-64 relocation types */ #define R_X86_64_NONE 0 /* No reloc */ #define R_X86_64_64 1 /* Direct 64 bit */ #define R_X86_64_PC32 2 /* PC relative 32 bit signed */ #define R_X86_64_GOT32 3 /* 32 bit GOT entry */ #define R_X86_64_PLT32 4 /* 32 bit PLT address */ #define R_X86_64_COPY 5 /* Copy symbol at runtime */ #define R_X86_64_GLOB_DAT 6 /* Create GOT entry */ #define R_X86_64_JUMP_SLOT 7 /* Create PLT entry */ #define R_X86_64_RELATIVE 8 /* Adjust by program base */ #define R_X86_64_GOTPCREL 9 /* 32 bit signed pc relative offset to GOT */ #define R_X86_64_32 10 /* Direct 32 bit zero extended */ #define R_X86_64_32S 11 /* Direct 32 bit sign extended */ #define R_X86_64_16 12 /* Direct 16 bit zero extended */ #define R_X86_64_PC16 13 /* 16 bit sign extended pc relative */ #define R_X86_64_8 14 /* Direct 8 bit sign extended */ #define R_X86_64_PC8 15 /* 8 bit sign extended pc relative */ #define R_X86_64_NUM 16 /* Legal values for e_flags field of Elf64_Ehdr. */ #define EF_ALPHA_32BIT 1 /* All addresses are below 2GB */ /* HPPA specific definitions. */ /* Legal values for e_flags field of Elf32_Ehdr. */ #define EF_PARISC_TRAPNIL 0x00010000 /* Trap nil pointer dereference. */ #define EF_PARISC_EXT 0x00020000 /* Program uses arch. extensions. */ #define EF_PARISC_LSB 0x00040000 /* Program expects little endian. */ #define EF_PARISC_WIDE 0x00080000 /* Program expects wide mode. */ #define EF_PARISC_NO_KABP 0x00100000 /* No kernel assisted branch prediction. */ #define EF_PARISC_LAZYSWAP 0x00400000 /* Allow lazy swapping. */ #define EF_PARISC_ARCH 0x0000ffff /* Architecture version. */ /* Defined values for `e_flags & EF_PARISC_ARCH' are: */ #define EFA_PARISC_1_0 0x020b /* PA-RISC 1.0 big-endian. */ #define EFA_PARISC_1_1 0x0210 /* PA-RISC 1.1 big-endian. */ #define EFA_PARISC_2_0 0x0214 /* PA-RISC 2.0 big-endian. */ /* Additional section indices. */ #define SHN_PARISC_ANSI_COMMON 0xff00 /* Section for tentatively declared symbols in ANSI C. */ #define SHN_PARISC_HUGE_COMMON 0xff01 /* Common blocks in huge model. */ /* Legal values for sh_type field of Elf32_Shdr. */ #define SHT_PARISC_EXT 0x70000000 /* Contains product specific ext. */ #define SHT_PARISC_UNWIND 0x70000001 /* Unwind information. */ #define SHT_PARISC_DOC 0x70000002 /* Debug info for optimized code. */ /* Legal values for sh_flags field of Elf32_Shdr. */ #define SHF_PARISC_SHORT 0x20000000 /* Section with short addressing. */ #define SHF_PARISC_HUGE 0x40000000 /* Section far from gp. */ #define SHF_PARISC_SBP 0x80000000 /* Static branch prediction code. */ /* Legal values for ST_TYPE subfield of st_info (symbol type). */ #define STT_PARISC_MILLICODE 13 /* Millicode function entry point. */ #define STT_HP_OPAQUE (STT_LOOS + 0x1) #define STT_HP_STUB (STT_LOOS + 0x2) /* HPPA relocs. */ #define R_PARISC_NONE 0 /* No reloc. */ #define R_PARISC_DIR32 1 /* Direct 32-bit reference. */ #define R_PARISC_DIR21L 2 /* Left 21 bits of eff. address. */ #define R_PARISC_DIR17R 3 /* Right 17 bits of eff. address. */ #define R_PARISC_DIR17F 4 /* 17 bits of eff. address. */ #define R_PARISC_DIR14R 6 /* Right 14 bits of eff. address. */ #define R_PARISC_PCREL32 9 /* 32-bit rel. address. */ #define R_PARISC_PCREL21L 10 /* Left 21 bits of rel. address. */ #define R_PARISC_PCREL17R 11 /* Right 17 bits of rel. address. */ #define R_PARISC_PCREL17F 12 /* 17 bits of rel. address. */ #define R_PARISC_PCREL14R 14 /* Right 14 bits of rel. address. */ #define R_PARISC_DPREL21L 18 /* Left 21 bits of rel. address. */ #define R_PARISC_DPREL14R 22 /* Right 14 bits of rel. address. */ #define R_PARISC_GPREL21L 26 /* GP-relative, left 21 bits. */ #define R_PARISC_GPREL14R 30 /* GP-relative, right 14 bits. */