diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 4 | ||||
-rw-r--r-- | kernel/acct.c | 2 | ||||
-rw-r--r-- | kernel/capability.c | 35 | ||||
-rw-r--r-- | kernel/cgroup.c | 6 | ||||
-rw-r--r-- | kernel/cred.c | 3 | ||||
-rw-r--r-- | kernel/exec_domain.c | 137 | ||||
-rw-r--r-- | kernel/exit.c | 2 | ||||
-rw-r--r-- | kernel/fork.c | 145 | ||||
-rw-r--r-- | kernel/gcov/base.c | 5 | ||||
-rw-r--r-- | kernel/groups.c | 3 | ||||
-rw-r--r-- | kernel/hung_task.c | 4 | ||||
-rw-r--r-- | kernel/pid.c | 15 | ||||
-rw-r--r-- | kernel/ptrace.c | 39 | ||||
-rw-r--r-- | kernel/reboot.c | 53 | ||||
-rw-r--r-- | kernel/resource.c | 32 | ||||
-rw-r--r-- | kernel/signal.c | 14 | ||||
-rw-r--r-- | kernel/sys.c | 49 | ||||
-rw-r--r-- | kernel/sys_ni.c | 14 | ||||
-rw-r--r-- | kernel/sysctl.c | 25 | ||||
-rw-r--r-- | kernel/trace/trace_stack.c | 4 |
20 files changed, 300 insertions, 291 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 1408b3353a3c..0f8f8b0bc1bf 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -9,7 +9,9 @@ obj-y = fork.o exec_domain.o panic.o \ extable.o params.o \ kthread.o sys_ni.o nsproxy.o \ notifier.o ksysfs.o cred.o reboot.o \ - async.o range.o groups.o smpboot.o + async.o range.o smpboot.o + +obj-$(CONFIG_MULTIUSER) += groups.o ifdef CONFIG_FUNCTION_TRACER # Do not trace debug files and internal ftrace files diff --git a/kernel/acct.c b/kernel/acct.c index e6c10d1a4058..74963d192c5d 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -213,7 +213,7 @@ static int acct_on(struct filename *pathname) return -EACCES; } - if (!file->f_op->write) { + if (!(file->f_mode & FMODE_CAN_WRITE)) { kfree(acct); filp_close(file, NULL); return -EIO; diff --git a/kernel/capability.c b/kernel/capability.c index 989f5bfc57dc..45432b54d5c6 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -35,6 +35,7 @@ static int __init file_caps_disable(char *str) } __setup("no_file_caps", file_caps_disable); +#ifdef CONFIG_MULTIUSER /* * More recent versions of libcap are available from: * @@ -386,6 +387,24 @@ bool ns_capable(struct user_namespace *ns, int cap) } EXPORT_SYMBOL(ns_capable); + +/** + * capable - Determine if the current task has a superior capability in effect + * @cap: The capability to be tested for + * + * Return true if the current task has the given superior capability currently + * available for use, false if not. + * + * This sets PF_SUPERPRIV on the task if the capability is available on the + * assumption that it's about to be used. + */ +bool capable(int cap) +{ + return ns_capable(&init_user_ns, cap); +} +EXPORT_SYMBOL(capable); +#endif /* CONFIG_MULTIUSER */ + /** * file_ns_capable - Determine if the file's opener had a capability in effect * @file: The file we want to check @@ -412,22 +431,6 @@ bool file_ns_capable(const struct file *file, struct user_namespace *ns, EXPORT_SYMBOL(file_ns_capable); /** - * capable - Determine if the current task has a superior capability in effect - * @cap: The capability to be tested for - * - * Return true if the current task has the given superior capability currently - * available for use, false if not. - * - * This sets PF_SUPERPRIV on the task if the capability is available on the - * assumption that it's about to be used. - */ -bool capable(int cap) -{ - return ns_capable(&init_user_ns, cap); -} -EXPORT_SYMBOL(capable); - -/** * capable_wrt_inode_uidgid - Check nsown_capable and uid and gid mapped * @inode: The inode in question * @cap: The capability in question diff --git a/kernel/cgroup.c b/kernel/cgroup.c index a220fdb66568..469dd547770c 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -4196,7 +4196,9 @@ static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos) static int cgroup_pidlist_show(struct seq_file *s, void *v) { - return seq_printf(s, "%d\n", *(int *)v); + seq_printf(s, "%d\n", *(int *)v); + + return 0; } static u64 cgroup_read_notify_on_release(struct cgroup_subsys_state *css, @@ -5451,7 +5453,7 @@ struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss) { WARN_ON_ONCE(!rcu_read_lock_held()); - return idr_find(&ss->css_idr, id); + return id > 0 ? idr_find(&ss->css_idr, id) : NULL; } #ifdef CONFIG_CGROUP_DEBUG diff --git a/kernel/cred.c b/kernel/cred.c index e0573a43c7df..ec1c07667ec1 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -29,6 +29,9 @@ static struct kmem_cache *cred_jar; +/* init to 2 - one for init_task, one to ensure it is never freed */ +struct group_info init_groups = { .usage = ATOMIC_INIT(2) }; + /* * The initial credentials for the initial task */ diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c index 83d4382f5699..6873bb3e6b7e 100644 --- a/kernel/exec_domain.c +++ b/kernel/exec_domain.c @@ -20,145 +20,10 @@ #include <linux/types.h> #include <linux/fs_struct.h> - -static void default_handler(int, struct pt_regs *); - -static struct exec_domain *exec_domains = &default_exec_domain; -static DEFINE_RWLOCK(exec_domains_lock); - - -static unsigned long ident_map[32] = { - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31 -}; - -struct exec_domain default_exec_domain = { - .name = "Linux", /* name */ - .handler = default_handler, /* lcall7 causes a seg fault. */ - .pers_low = 0, /* PER_LINUX personality. */ - .pers_high = 0, /* PER_LINUX personality. */ - .signal_map = ident_map, /* Identity map signals. */ - .signal_invmap = ident_map, /* - both ways. */ -}; - - -static void -default_handler(int segment, struct pt_regs *regp) -{ - set_personality(0); - - if (current_thread_info()->exec_domain->handler != default_handler) - current_thread_info()->exec_domain->handler(segment, regp); - else - send_sig(SIGSEGV, current, 1); -} - -static struct exec_domain * -lookup_exec_domain(unsigned int personality) -{ - unsigned int pers = personality(personality); - struct exec_domain *ep; - - read_lock(&exec_domains_lock); - for (ep = exec_domains; ep; ep = ep->next) { - if (pers >= ep->pers_low && pers <= ep->pers_high) - if (try_module_get(ep->module)) - goto out; - } - -#ifdef CONFIG_MODULES - read_unlock(&exec_domains_lock); - request_module("personality-%d", pers); - read_lock(&exec_domains_lock); - - for (ep = exec_domains; ep; ep = ep->next) { - if (pers >= ep->pers_low && pers <= ep->pers_high) - if (try_module_get(ep->module)) - goto out; - } -#endif - - ep = &default_exec_domain; -out: - read_unlock(&exec_domains_lock); - return ep; -} - -int -register_exec_domain(struct exec_domain *ep) -{ - struct exec_domain *tmp; - int err = -EBUSY; - - if (ep == NULL) - return -EINVAL; - - if (ep->next != NULL) - return -EBUSY; - - write_lock(&exec_domains_lock); - for (tmp = exec_domains; tmp; tmp = tmp->next) { - if (tmp == ep) - goto out; - } - - ep->next = exec_domains; - exec_domains = ep; - err = 0; - -out: - write_unlock(&exec_domains_lock); - return err; -} -EXPORT_SYMBOL(register_exec_domain); - -int -unregister_exec_domain(struct exec_domain *ep) -{ - struct exec_domain **epp; - - epp = &exec_domains; - write_lock(&exec_domains_lock); - for (epp = &exec_domains; *epp; epp = &(*epp)->next) { - if (ep == *epp) - goto unregister; - } - write_unlock(&exec_domains_lock); - return -EINVAL; - -unregister: - *epp = ep->next; - ep->next = NULL; - write_unlock(&exec_domains_lock); - return 0; -} -EXPORT_SYMBOL(unregister_exec_domain); - -int __set_personality(unsigned int personality) -{ - struct exec_domain *oep = current_thread_info()->exec_domain; - - current_thread_info()->exec_domain = lookup_exec_domain(personality); - current->personality = personality; - module_put(oep->module); - - return 0; -} -EXPORT_SYMBOL(__set_personality); - #ifdef CONFIG_PROC_FS static int execdomains_proc_show(struct seq_file *m, void *v) { - struct exec_domain *ep; - - read_lock(&exec_domains_lock); - for (ep = exec_domains; ep; ep = ep->next) - seq_printf(m, "%d-%d\t%-16s\t[%s]\n", - ep->pers_low, ep->pers_high, ep->name, - module_name(ep->module)); - read_unlock(&exec_domains_lock); + seq_puts(m, "0-0\tLinux \t[kernel]\n"); return 0; } diff --git a/kernel/exit.c b/kernel/exit.c index feff10bbb307..22fcc05dec40 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -756,8 +756,6 @@ void do_exit(long code) cgroup_exit(tsk); - module_put(task_thread_info(tsk)->exec_domain->module); - /* * FIXME: do that only when needed, using sched_exit tracepoint */ diff --git a/kernel/fork.c b/kernel/fork.c index cf65139615a0..03c1eaaa6ef5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -74,6 +74,7 @@ #include <linux/uprobes.h> #include <linux/aio.h> #include <linux/compiler.h> +#include <linux/sysctl.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -88,6 +89,16 @@ #include <trace/events/task.h> /* + * Minimum number of threads to boot the kernel + */ +#define MIN_THREADS 20 + +/* + * Maximum number of threads + */ +#define MAX_THREADS FUTEX_TID_MASK + +/* * Protected counters by write_lock_irq(&tasklist_lock) */ unsigned long total_forks; /* Handle normal Linux uptimes. */ @@ -253,7 +264,30 @@ EXPORT_SYMBOL_GPL(__put_task_struct); void __init __weak arch_task_cache_init(void) { } -void __init fork_init(unsigned long mempages) +/* + * set_max_threads + */ +static void set_max_threads(unsigned int max_threads_suggested) +{ + u64 threads; + + /* + * The number of threads shall be limited such that the thread + * structures may only consume a small part of the available memory. + */ + if (fls64(totalram_pages) + fls64(PAGE_SIZE) > 64) + threads = MAX_THREADS; + else + threads = div64_u64((u64) totalram_pages * (u64) PAGE_SIZE, + (u64) THREAD_SIZE * 8UL); + + if (threads > max_threads_suggested) + threads = max_threads_suggested; + + max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS); +} + +void __init fork_init(void) { #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR #ifndef ARCH_MIN_TASKALIGN @@ -268,18 +302,7 @@ void __init fork_init(unsigned long mempages) /* do the arch specific task caches init */ arch_task_cache_init(); - /* - * The default maximum number of threads is set to a safe - * value: the thread structures can take up at most half - * of memory. - */ - max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE); - - /* - * we need to allow at least 20 threads to boot a system - */ - if (max_threads < 20) - max_threads = 20; + set_max_threads(MAX_THREADS); init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2; init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; @@ -380,6 +403,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) */ down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING); + /* No ordering required: file already has been exposed. */ + RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm)); + mm->total_vm = oldmm->total_vm; mm->shared_vm = oldmm->shared_vm; mm->exec_vm = oldmm->exec_vm; @@ -505,7 +531,13 @@ static inline void mm_free_pgd(struct mm_struct *mm) pgd_free(mm, mm->pgd); } #else -#define dup_mmap(mm, oldmm) (0) +static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) +{ + down_write(&oldmm->mmap_sem); + RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm)); + up_write(&oldmm->mmap_sem); + return 0; +} #define mm_alloc_pgd(mm) (0) #define mm_free_pgd(mm) #endif /* CONFIG_MMU */ @@ -674,34 +706,53 @@ void mmput(struct mm_struct *mm) } EXPORT_SYMBOL_GPL(mmput); +/** + * set_mm_exe_file - change a reference to the mm's executable file + * + * This changes mm's executable file (shown as symlink /proc/[pid]/exe). + * + * Main users are mmput() and sys_execve(). Callers prevent concurrent + * invocations: in mmput() nobody alive left, in execve task is single + * threaded. sys_prctl(PR_SET_MM_MAP/EXE_FILE) also needs to set the + * mm->exe_file, but does so without using set_mm_exe_file() in order + * to do avoid the need for any locks. + */ void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) { + struct file *old_exe_file; + + /* + * It is safe to dereference the exe_file without RCU as + * this function is only called if nobody else can access + * this mm -- see comment above for justification. + */ + old_exe_file = rcu_dereference_raw(mm->exe_file); + if (new_exe_file) get_file(new_exe_file); - if (mm->exe_file) - fput(mm->exe_file); - mm->exe_file = new_exe_file; + rcu_assign_pointer(mm->exe_file, new_exe_file); + if (old_exe_file) + fput(old_exe_file); } +/** + * get_mm_exe_file - acquire a reference to the mm's executable file + * + * Returns %NULL if mm has no associated executable file. + * User must release file via fput(). + */ struct file *get_mm_exe_file(struct mm_struct *mm) { struct file *exe_file; - /* We need mmap_sem to protect against races with removal of exe_file */ - down_read(&mm->mmap_sem); - exe_file = mm->exe_file; - if (exe_file) - get_file(exe_file); - up_read(&mm->mmap_sem); + rcu_read_lock(); + exe_file = rcu_dereference(mm->exe_file); + if (exe_file && !get_file_rcu(exe_file)) + exe_file = NULL; + rcu_read_unlock(); return exe_file; } - -static void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm) -{ - /* It's safe to write the exe_file pointer without exe_file_lock because - * this is called during fork when the task is not yet in /proc */ - newmm->exe_file = get_mm_exe_file(oldmm); -} +EXPORT_SYMBOL(get_mm_exe_file); /** * get_task_mm - acquire a reference to the task's mm @@ -864,8 +915,6 @@ static struct mm_struct *dup_mm(struct task_struct *tsk) if (!mm_init(mm, tsk)) goto fail_nomem; - dup_mm_exe_file(oldmm, mm); - err = dup_mmap(mm, oldmm); if (err) goto free_pt; @@ -1279,9 +1328,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (nr_threads >= max_threads) goto bad_fork_cleanup_count; - if (!try_module_get(task_thread_info(p)->exec_domain->module)) - goto bad_fork_cleanup_count; - delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER); p->flags |= PF_FORKNOEXEC; @@ -1406,10 +1452,11 @@ static struct task_struct *copy_process(unsigned long clone_flags, goto bad_fork_cleanup_io; if (pid != &init_struct_pid) { - retval = -ENOMEM; pid = alloc_pid(p->nsproxy->pid_ns_for_children); - if (!pid) + if (IS_ERR(pid)) { + retval = PTR_ERR(pid); goto bad_fork_cleanup_io; + } } p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; @@ -1590,7 +1637,6 @@ bad_fork_cleanup_threadgroup_lock: if (clone_flags & CLONE_THREAD) threadgroup_change_end(current); delayacct_tsk_free(p); - module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: atomic_dec(&p->cred->user->processes); exit_creds(p); @@ -2004,3 +2050,26 @@ int unshare_files(struct files_struct **displaced) task_unlock(task); return 0; } + +int sysctl_max_threads(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table t; + int ret; + int threads = max_threads; + int min = MIN_THREADS; + int max = MAX_THREADS; + + t = *table; + t.data = &threads; + t.extra1 = &min; + t.extra2 = &max; + + ret = proc_dointvec_minmax(&t, write, buffer, lenp, ppos); + if (ret || !write) + return ret; + + set_max_threads(threads); + + return 0; +} diff --git a/kernel/gcov/base.c b/kernel/gcov/base.c index b358a802fd18..a744098e4eb7 100644 --- a/kernel/gcov/base.c +++ b/kernel/gcov/base.c @@ -18,6 +18,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/mutex.h> +#include <linux/sched.h> #include "gcov.h" static int gcov_events_enabled; @@ -107,8 +108,10 @@ void gcov_enable_events(void) gcov_events_enabled = 1; /* Perform event callback for previously registered entries. */ - while ((info = gcov_info_next(info))) + while ((info = gcov_info_next(info))) { gcov_event(GCOV_ADD, info); + cond_resched(); + } mutex_unlock(&gcov_lock); } diff --git a/kernel/groups.c b/kernel/groups.c index 664411f171b5..74d431d25251 100644 --- a/kernel/groups.c +++ b/kernel/groups.c @@ -9,9 +9,6 @@ #include <linux/user_namespace.h> #include <asm/uaccess.h> -/* init to 2 - one for init_task, one to ensure it is never freed */ -struct group_info init_groups = { .usage = ATOMIC_INIT(2) }; - struct group_info *groups_alloc(int gidsetsize) { struct group_info *group_info; diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 06db12434d72..e0f90c2b57aa 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -169,7 +169,7 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout) return; rcu_read_lock(); - do_each_thread(g, t) { + for_each_process_thread(g, t) { if (!max_count--) goto unlock; if (!--batch_count) { @@ -180,7 +180,7 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout) /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */ if (t->state == TASK_UNINTERRUPTIBLE) check_hung_task(t, timeout); - } while_each_thread(g, t); + } unlock: rcu_read_unlock(); } diff --git a/kernel/pid.c b/kernel/pid.c index cd36a5e0d173..4fd07d5b7baf 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -182,7 +182,7 @@ static int alloc_pidmap(struct pid_namespace *pid_ns) spin_unlock_irq(&pidmap_lock); kfree(page); if (unlikely(!map->page)) - break; + return -ENOMEM; } if (likely(atomic_read(&map->nr_free))) { for ( ; ; ) { @@ -210,7 +210,7 @@ static int alloc_pidmap(struct pid_namespace *pid_ns) } pid = mk_pid(pid_ns, map, offset); } - return -1; + return -EAGAIN; } int next_pidmap(struct pid_namespace *pid_ns, unsigned int last) @@ -301,17 +301,20 @@ struct pid *alloc_pid(struct pid_namespace *ns) int i, nr; struct pid_namespace *tmp; struct upid *upid; + int retval = -ENOMEM; pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL); if (!pid) - goto out; + return ERR_PTR(retval); tmp = ns; pid->level = ns->level; for (i = ns->level; i >= 0; i--) { nr = alloc_pidmap(tmp); - if (nr < 0) + if (IS_ERR_VALUE(nr)) { + retval = nr; goto out_free; + } pid->numbers[i].nr = nr; pid->numbers[i].ns = tmp; @@ -339,7 +342,6 @@ struct pid *alloc_pid(struct pid_namespace *ns) } spin_unlock_irq(&pidmap_lock); -out: return pid; out_unlock: @@ -351,8 +353,7 @@ out_free: free_pidmap(pid->numbers + i); kmem_cache_free(ns->pid_cachep, pid); - pid = NULL; - goto out; + return ERR_PTR(retval); } void disable_pid_allocation(struct pid_namespace *ns) diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 227fec36b12a..c8e0e050a36a 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -456,8 +456,6 @@ static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p) static int ptrace_detach(struct task_struct *child, unsigned int data) { - bool dead = false; - if (!valid_signal(data)) return -EIO; @@ -467,18 +465,19 @@ static int ptrace_detach(struct task_struct *child, unsigned int data) write_lock_irq(&tasklist_lock); /* - * This child can be already killed. Make sure de_thread() or - * our sub-thread doing do_wait() didn't do release_task() yet. + * We rely on ptrace_freeze_traced(). It can't be killed and + * untraced by another thread, it can't be a zombie. */ - if (child->ptrace) { - child->exit_code = data; - dead = __ptrace_detach(current, child); - } + WARN_ON(!child->ptrace || child->exit_state); + /* + * tasklist_lock avoids the race with wait_task_stopped(), see + * the comment in ptrace_resume(). + */ + child->exit_code = data; + __ptrace_detach(current, child); write_unlock_irq(&tasklist_lock); proc_ptrace_connector(child, PTRACE_DETACH); - if (unlikely(dead)) - release_task(child); return 0; } @@ -697,6 +696,8 @@ static int ptrace_peek_siginfo(struct task_struct *child, static int ptrace_resume(struct task_struct *child, long request, unsigned long data) { + bool need_siglock; + if (!valid_signal(data)) return -EIO; @@ -724,8 +725,26 @@ static int ptrace_resume(struct task_struct *child, long request, user_disable_single_step(child); } + /* + * Change ->exit_code and ->state under siglock to avoid the race + * with wait_task_stopped() in between; a non-zero ->exit_code will + * wrongly look like another report from tracee. + * + * Note that we need siglock even if ->exit_code == data and/or this + * status was not reported yet, the new status must not be cleared by + * wait_task_stopped() after resume. + * + * If data == 0 we do not care if wait_task_stopped() reports the old + * status and clears the code too; this can't race with the tracee, it + * takes siglock after resume. + */ + need_siglock = data && !thread_group_empty(current); + if (need_siglock) + spin_lock_irq(&child->sighand->siglock); child->exit_code = data; wake_up_state(child, __TASK_TRACED); + if (need_siglock) + spin_unlock_irq(&child->sighand->siglock); return 0; } diff --git a/kernel/reboot.c b/kernel/reboot.c index 5925f5ae8dff..d20c85d9f8c0 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -387,8 +387,9 @@ void ctrl_alt_del(void) } char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; +static const char reboot_cmd[] = "/sbin/reboot"; -static int __orderly_poweroff(bool force) +static int run_cmd(const char *cmd) { char **argv; static char *envp[] = { @@ -397,8 +398,7 @@ static int __orderly_poweroff(bool force) NULL }; int ret; - - argv = argv_split(GFP_KERNEL, poweroff_cmd, NULL); + argv = argv_split(GFP_KERNEL, cmd, NULL); if (argv) { ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); argv_free(argv); @@ -406,8 +406,33 @@ static int __orderly_poweroff(bool force) ret = -ENOMEM; } + return ret; +} + +static int __orderly_reboot(void) +{ + int ret; + + ret = run_cmd(reboot_cmd); + + if (ret) { + pr_warn("Failed to start orderly reboot: forcing the issue\n"); + emergency_sync(); + kernel_restart(NULL); + } + + return ret; +} + +static int __orderly_poweroff(bool force) +{ + int ret; + + ret = run_cmd(poweroff_cmd); + if (ret && force) { pr_warn("Failed to start orderly shutdown: forcing the issue\n"); + /* * I guess this should try to kick off some daemon to sync and * poweroff asap. Or not even bother syncing if we're doing an @@ -436,15 +461,33 @@ static DECLARE_WORK(poweroff_work, poweroff_work_func); * This may be called from any context to trigger a system shutdown. * If the orderly shutdown fails, it will force an immediate shutdown. */ -int orderly_poweroff(bool force) +void orderly_poweroff(bool force) { if (force) /* do not override the pending "true" */ poweroff_force = true; schedule_work(&poweroff_work); - return 0; } EXPORT_SYMBOL_GPL(orderly_poweroff); +static void reboot_work_func(struct work_struct *work) +{ + __orderly_reboot(); +} + +static DECLARE_WORK(reboot_work, reboot_work_func); + +/** + * orderly_reboot - Trigger an orderly system reboot + * + * This may be called from any context to trigger a system reboot. + * If the orderly reboot fails, it will force an immediate reboot. + */ +void orderly_reboot(void) +{ + schedule_work(&reboot_work); +} +EXPORT_SYMBOL_GPL(orderly_reboot); + static int __init reboot_setup(char *str) { for (;;) { diff --git a/kernel/resource.c b/kernel/resource.c index 19f2357dfda3..90552aab5f2d 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -1034,8 +1034,6 @@ resource_size_t resource_alignment(struct resource *res) * * request_region creates a new busy region. * - * check_region returns non-zero if the area is already busy. - * * release_region releases a matching busy region. */ @@ -1098,36 +1096,6 @@ struct resource * __request_region(struct resource *parent, EXPORT_SYMBOL(__request_region); /** - * __check_region - check if a resource region is busy or free - * @parent: parent resource descriptor - * @start: resource start address - * @n: resource region size - * - * Returns 0 if the region is free at the moment it is checked, - * returns %-EBUSY if the region is busy. - * - * NOTE: - * This function is deprecated because its use is racy. - * Even if it returns 0, a subsequent call to request_region() - * may fail because another driver etc. just allocated the region. - * Do NOT use it. It will be removed from the kernel. - */ -int __check_region(struct resource *parent, resource_size_t start, - resource_size_t n) -{ - struct resource * res; - - res = __request_region(parent, start, n, "check-region", 0); - if (!res) - return -EBUSY; - - release_resource(res); - free_resource(res); - return 0; -} -EXPORT_SYMBOL(__check_region); - -/** * __release_region - release a previously reserved resource region * @parent: parent resource descriptor * @start: resource start address diff --git a/kernel/signal.c b/kernel/signal.c index a390499943e4..d51c5ddd855c 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2992,11 +2992,9 @@ static int do_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t *info) * Nor can they impersonate a kill()/tgkill(), which adds source info. */ if ((info->si_code >= 0 || info->si_code == SI_TKILL) && - (task_pid_vnr(current) != pid)) { - /* We used to allow any < 0 si_code */ - WARN_ON_ONCE(info->si_code < 0); + (task_pid_vnr(current) != pid)) return -EPERM; - } + info->si_signo = sig; /* POSIX.1b doesn't mention process groups. */ @@ -3041,12 +3039,10 @@ static int do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info) /* Not even root can pretend to send signals from the kernel. * Nor can they impersonate a kill()/tgkill(), which adds source info. */ - if (((info->si_code >= 0 || info->si_code == SI_TKILL)) && - (task_pid_vnr(current) != pid)) { - /* We used to allow any < 0 si_code */ - WARN_ON_ONCE(info->si_code < 0); + if ((info->si_code >= 0 || info->si_code == SI_TKILL) && + (task_pid_vnr(current) != pid)) return -EPERM; - } + info->si_signo = sig; return do_send_specific(tgid, pid, sig, info); diff --git a/kernel/sys.c b/kernel/sys.c index a03d9cd23ed7..a4e372b798a5 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -325,6 +325,7 @@ out_unlock: * SMP: There are not races, the GIDs are checked only by filesystem * operations (as far as semantic preservation is concerned). */ +#ifdef CONFIG_MULTIUSER SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid) { struct user_namespace *ns = current_user_ns(); @@ -815,6 +816,7 @@ change_okay: commit_creds(new); return old_fsgid; } +#endif /* CONFIG_MULTIUSER */ /** * sys_getpid - return the thread group id of the current process @@ -1647,14 +1649,13 @@ SYSCALL_DEFINE1(umask, int, mask) return mask; } -static int prctl_set_mm_exe_file_locked(struct mm_struct *mm, unsigned int fd) +static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) { struct fd exe; + struct file *old_exe, *exe_file; struct inode *inode; int err; - VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm); - exe = fdget(fd); if (!exe.file) return -EBADF; @@ -1678,15 +1679,22 @@ static int prctl_set_mm_exe_file_locked(struct mm_struct *mm, unsigned int fd) /* * Forbid mm->exe_file change if old file still mapped. */ + exe_file = get_mm_exe_file(mm); err = -EBUSY; - if (mm->exe_file) { + if (exe_file) { struct vm_area_struct *vma; - for (vma = mm->mmap; vma; vma = vma->vm_next) - if (vma->vm_file && - path_equal(&vma->vm_file->f_path, - &mm->exe_file->f_path)) - goto exit; + down_read(&mm->mmap_sem); + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (!vma->vm_file) + continue; + if (path_equal(&vma->vm_file->f_path, + &exe_file->f_path)) + goto exit_err; + } + + up_read(&mm->mmap_sem); + fput(exe_file); } /* @@ -1700,10 +1708,18 @@ static int prctl_set_mm_exe_file_locked(struct mm_struct *mm, unsigned int fd) goto exit; err = 0; - set_mm_exe_file(mm, exe.file); /* this grabs a reference to exe.file */ + /* set the new file, lockless */ + get_file(exe.file); + old_exe = xchg(&mm->exe_file, exe.file); + if (old_exe) + fput(old_exe); exit: fdput(exe); return err; +exit_err: + up_read(&mm->mmap_sem); + fput(exe_file); + goto exit; } #ifdef CONFIG_CHECKPOINT_RESTORE @@ -1838,10 +1854,9 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data user_auxv[AT_VECTOR_SIZE - 1] = AT_NULL; } - down_write(&mm->mmap_sem); if (prctl_map.exe_fd != (u32)-1) - error = prctl_set_mm_exe_file_locked(mm, prctl_map.exe_fd); - downgrade_write(&mm->mmap_sem); + error = prctl_set_mm_exe_file(mm, prctl_map.exe_fd); + down_read(&mm->mmap_sem); if (error) goto out; @@ -1907,12 +1922,8 @@ static int prctl_set_mm(int opt, unsigned long addr, if (!capable(CAP_SYS_RESOURCE)) return -EPERM; - if (opt == PR_SET_MM_EXE_FILE) { - down_write(&mm->mmap_sem); - error = prctl_set_mm_exe_file_locked(mm, (unsigned int)addr); - up_write(&mm->mmap_sem); - return error; - } + if (opt == PR_SET_MM_EXE_FILE) + return prctl_set_mm_exe_file(mm, (unsigned int)addr); if (addr >= TASK_SIZE || addr < mmap_min_addr) return -EINVAL; diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 5adcb0ae3a58..7995ef5868d8 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -159,6 +159,20 @@ cond_syscall(sys_uselib); cond_syscall(sys_fadvise64); cond_syscall(sys_fadvise64_64); cond_syscall(sys_madvise); +cond_syscall(sys_setuid); +cond_syscall(sys_setregid); +cond_syscall(sys_setgid); +cond_syscall(sys_setreuid); +cond_syscall(sys_setresuid); +cond_syscall(sys_getresuid); +cond_syscall(sys_setresgid); +cond_syscall(sys_getresgid); +cond_syscall(sys_setgroups); +cond_syscall(sys_getgroups); +cond_syscall(sys_setfsuid); +cond_syscall(sys_setfsgid); +cond_syscall(sys_capget); +cond_syscall(sys_capset); /* arch-specific weak syscall entries */ cond_syscall(sys_pciconfig_read); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8c0eabd41886..2082b1a88fb9 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -93,11 +93,9 @@ #include <linux/nmi.h> #endif - #if defined(CONFIG_SYSCTL) /* External variables not in a header file. */ -extern int max_threads; extern int suid_dumpable; #ifdef CONFIG_COREDUMP extern int core_uses_pid; @@ -710,10 +708,10 @@ static struct ctl_table kern_table[] = { #endif { .procname = "threads-max", - .data = &max_threads, + .data = NULL, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = sysctl_max_threads, }, { .procname = "random", @@ -1335,6 +1333,15 @@ static struct ctl_table vm_table[] = { .extra1 = &min_extfrag_threshold, .extra2 = &max_extfrag_threshold, }, + { + .procname = "compact_unevictable_allowed", + .data = &sysctl_compact_unevictable_allowed, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &one, + }, #endif /* CONFIG_COMPACTION */ { @@ -1974,7 +1981,15 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp, int write, void *data) { if (write) { - *valp = *negp ? -*lvalp : *lvalp; + if (*negp) { + if (*lvalp > (unsigned long) INT_MAX + 1) + return -EINVAL; + *valp = -*lvalp; + } else { + if (*lvalp > (unsigned long) INT_MAX) + return -EINVAL; + *valp = *lvalp; + } } else { int val = *valp; if (val < 0) { diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index c3e4fcfddd45..3f34496244e9 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -327,11 +327,11 @@ static void t_stop(struct seq_file *m, void *p) local_irq_enable(); } -static int trace_lookup_stack(struct seq_file *m, long i) +static void trace_lookup_stack(struct seq_file *m, long i) { unsigned long addr = stack_dump_trace[i]; - return seq_printf(m, "%pS\n", (void *)addr); + seq_printf(m, "%pS\n", (void *)addr); } static void print_disabled(struct seq_file *m) |