]> git.neil.brown.name Git - history.git/commitdiff
[PATCH] O(1) sys_exit(), threading, scalable-exit-2.5.31-B4
authorIngo Molnar <mingo@elte.hu>
Mon, 19 Aug 2002 06:07:20 +0000 (23:07 -0700)
committerIngo Molnar <mingo@elte.hu>
Mon, 19 Aug 2002 06:07:20 +0000 (23:07 -0700)
the attached patch updates a number of items:

 - adds cleanups suggested by Christoph Hellwig: needed unlikely()
   statements, a superfluous #define and line length problems.

 - splits up the global ptrace list into per-task ptrace lists. This was
   pretty straightforward, and this makes the worst-case exit() latency
   O(nr_children).

the per-task ptrace lists unearthed a bug that the previous code did not
take care of: tasks on the ptrace list have to be correctly reparented as
well. This patch passed my stresstests as well.

17 files changed:
arch/i386/kernel/i8259.c
arch/i386/kernel/irq.c
arch/i386/kernel/process.c
arch/i386/kernel/signal.c
arch/i386/kernel/traps.c
arch/i386/kernel/vm86.c
include/asm-i386/smp.h
include/asm-i386/user.h
include/linux/binfmts.h
include/linux/elfcore.h
include/linux/init_task.h
include/linux/mm.h
include/linux/ptrace.h
include/linux/sched.h
kernel/exit.c
kernel/fork.c
kernel/ptrace.c

index 4f7a5472eb1c096d092e5e5324a1b1186b20780c..587cf1ec6db5bf6de875b16001d5e940ec614309 100644 (file)
@@ -1,5 +1,4 @@
 #include <linux/config.h>
-#include <linux/ptrace.h>
 #include <linux/errno.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
index 039ba8a226a49ed0afcaa22c115a8bf23e1d11d6..b283b5c0c8b1a798baeceb7e724330cc29073dff 100644 (file)
@@ -18,7 +18,6 @@
  */
 
 #include <linux/config.h>
-#include <linux/ptrace.h>
 #include <linux/errno.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
index ce8e788e240cbcd41bdd2e3242c24555df3b7d50..869d2533ae0b77d99e1344215c44e99845808630 100644 (file)
@@ -23,7 +23,6 @@
 #include <linux/smp_lock.h>
 #include <linux/stddef.h>
 #include <linux/unistd.h>
-#include <linux/ptrace.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/user.h>
index e350d19a3528dcb6748072cda818ea373823f0f5..e5d6e9ffc5efe25dedfc8f8d593ad2080db47b88 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/signal.h>
 #include <linux/errno.h>
 #include <linux/wait.h>
-#include <linux/ptrace.h>
 #include <linux/unistd.h>
 #include <linux/stddef.h>
 #include <linux/personality.h>
index 4338f4c60fa9ca76eef28613c19c4d62dd073a73..7a6ae732bfc98bf1c5080f38a698d5338b5528cf 100644 (file)
@@ -16,7 +16,6 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/errno.h>
-#include <linux/ptrace.h>
 #include <linux/timer.h>
 #include <linux/mm.h>
 #include <linux/init.h>
index 7c60b661f713b7fec24966fdb0f728e9ced95adf..90b273f1cb5b6a5d0ea1ad6482278a43cdbce2ed 100644 (file)
@@ -8,7 +8,6 @@
 #include <linux/kernel.h>
 #include <linux/signal.h>
 #include <linux/string.h>
-#include <linux/ptrace.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/smp_lock.h>
index a6d46dec1d098996062cc426bf6d755439825a84..a4420f576dcf895c7ccb38d624faa6af520adc3c 100644 (file)
@@ -7,7 +7,6 @@
 #ifndef __ASSEMBLY__
 #include <linux/config.h>
 #include <linux/threads.h>
-#include <linux/ptrace.h>
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
index ddc06ea014a6bc59c3045e42f12898144477a91f..0e85d2a5e33a82f01d87743523a29cf3e327632b 100644 (file)
@@ -2,7 +2,6 @@
 #define _I386_USER_H
 
 #include <asm/page.h>
-#include <linux/ptrace.h>
 /* Core file format: The core file is written in such a way that gdb
    can understand it and provide useful information to the user (under
    linux we use the 'trad-core' bfd).  There are quite a number of
index 314addb2329d6c6753edb6ba0f31a8c3f57a32a5..dfea0f47ed3e1629cbb24cb7273554c0fadd4b19 100644 (file)
@@ -1,7 +1,6 @@
 #ifndef _LINUX_BINFMTS_H
 #define _LINUX_BINFMTS_H
 
-#include <linux/ptrace.h>
 #include <linux/capability.h>
 
 /*
index 3129abe828fc9968b427d731a2f8d0534da6cb48..ebf16207732d9fbfb965bb953b849be0d3d0b9ad 100644 (file)
@@ -4,7 +4,6 @@
 #include <linux/types.h>
 #include <linux/signal.h>
 #include <linux/time.h>
-#include <linux/ptrace.h>
 #include <linux/user.h>
 
 struct elf_siginfo
index d023d2360a715fe3fe1ed467965af791e576f8e6..80a57914bccc049453f35bf3562ce5225bd74b24 100644 (file)
@@ -54,6 +54,8 @@
        .run_list       = LIST_HEAD_INIT(tsk.run_list),                 \
        .time_slice     = HZ,                                           \
        .tasks          = LIST_HEAD_INIT(tsk.tasks),                    \
+       .ptrace_children= LIST_HEAD_INIT(tsk.ptrace_children),          \
+       .ptrace_list    = LIST_HEAD_INIT(tsk.ptrace_list),              \
        .real_parent    = &tsk,                                         \
        .parent         = &tsk,                                         \
        .children       = LIST_HEAD_INIT(tsk.children),                 \
index c4395b9fe950028ef4e5d0a5b4be6465797b6fa6..d731c02741b954ebf7adb88a3022f96831ca85f3 100644 (file)
@@ -354,12 +354,6 @@ extern pte_t *FASTCALL(pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned
 extern int handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access);
 extern int make_pages_present(unsigned long addr, unsigned long end);
 extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
-extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char *dst, int len);
-extern int ptrace_writedata(struct task_struct *tsk, char * src, unsigned long dst, int len);
-extern int ptrace_attach(struct task_struct *tsk);
-extern int ptrace_detach(struct task_struct *, unsigned int);
-extern void ptrace_disable(struct task_struct *);
-extern int ptrace_check_attach(struct task_struct *task, int kill);
 
 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start,
                int len, int write, int force, struct page **pages, struct vm_area_struct **vmas);
index 0a02879d546fd9f60ddc137dced9718cc8419d6c..78512fa922b986a935d1878e945d122a3a479fe6 100644 (file)
@@ -3,6 +3,8 @@
 /* ptrace.h */
 /* structs and defines to help the user use the ptrace system call. */
 
+#include <linux/compiler.h>
+
 /* has the defines to get at the registers. */
 
 #define PTRACE_TRACEME            0
 
 #include <asm/ptrace.h>
 
+extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char *dst, int len);
+extern int ptrace_writedata(struct task_struct *tsk, char * src, unsigned long dst, int len);
+extern int ptrace_attach(struct task_struct *tsk);
+extern int ptrace_detach(struct task_struct *, unsigned int);
+extern void ptrace_disable(struct task_struct *);
+extern int ptrace_check_attach(struct task_struct *task, int kill);
+extern void __ptrace_link(struct task_struct *child,
+                               struct task_struct *new_parent);
+extern void __ptrace_unlink(struct task_struct *child);
+
+static inline void ptrace_link(struct task_struct *child,
+                               struct task_struct *new_parent)
+{
+       if (unlikely(child->ptrace))
+               __ptrace_link(child, new_parent);
+}
+static inline void ptrace_unlink(struct task_struct *child)
+{
+       if (unlikely(child->ptrace))
+               __ptrace_unlink(child);
+}
+
 #endif
index 5713927c300a22075adb63ee3edd5b9a47064c15..5afeecb164b946adeeba957540647d02ed25a97b 100644 (file)
@@ -270,6 +270,8 @@ struct task_struct {
        unsigned int time_slice, first_time_slice;
 
        struct list_head tasks;
+       struct list_head ptrace_children;
+       struct list_head ptrace_list;
 
        struct mm_struct *mm, *active_mm;
        struct list_head local_pages;
index 8c51bf9c8aee789f98496ce2e3e1d571c36c4cb6..f2390db88ab6a921ab8d16954d9b5205c5e5303c 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/acct.h>
 #include <linux/file.h>
 #include <linux/binfmts.h>
+#include <linux/ptrace.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -65,6 +66,8 @@ static void release_task(struct task_struct * p)
        atomic_dec(&p->user->processes);
        security_ops->task_free_security(p);
        free_uid(p->user);
+       BUG_ON(p->ptrace || !list_empty(&p->ptrace_list) ||
+                                       !list_empty(&p->ptrace_children));
        unhash_process(p);
 
        release_thread(p);
@@ -177,6 +180,7 @@ void reparent_to_init(void)
 {
        write_lock_irq(&tasklist_lock);
 
+       ptrace_unlink(current);
        /* Reparent to init */
        REMOVE_LINKS(current);
        current->parent = child_reaper;
@@ -231,45 +235,20 @@ void daemonize(void)
        atomic_inc(&current->files->count);
 }
 
-/*
- * When we die, we re-parent all our children.
- * Try to give them to another thread in our thread
- * group, and if no such member exists, give it to
- * the global child reaper process (ie "init")
- */
-static inline void forget_original_parent(struct task_struct * father)
+static void reparent_thread(task_t *p, task_t *reaper, task_t *child_reaper)
 {
-       struct task_struct * p, *reaper;
-
-       read_lock(&tasklist_lock);
-
-       /* Next in our thread group, if they're not already exiting */
-       reaper = father;
-       do {
-               reaper = next_thread(reaper);
-               if (!(reaper->flags & PF_EXITING))
-                       break;
-       } while (reaper != father);
-
-       if (reaper == father)
-               reaper = child_reaper;
-
-       for_each_task(p) {
-               if (p->real_parent == father) {
-                       /* We dont want people slaying init */
-                       p->exit_signal = SIGCHLD;
-                       p->self_exec_id++;
-
-                       /* Make sure we're not reparenting to ourselves */
-                       if (p == reaper)
-                               p->real_parent = child_reaper;
-                       else
-                               p->real_parent = reaper;
-
-                       if (p->pdeath_signal) send_sig(p->pdeath_signal, p, 0);
-               }
-       }
-       read_unlock(&tasklist_lock);
+       /* We dont want people slaying init */
+       p->exit_signal = SIGCHLD;
+       p->self_exec_id++;
+
+       /* Make sure we're not reparenting to ourselves */
+       if (p == reaper)
+               p->real_parent = child_reaper;
+       else
+               p->real_parent = reaper;
+
+       if (p->pdeath_signal)
+               send_sig(p->pdeath_signal, p, 0);
 }
 
 static inline void close_files(struct files_struct * files)
@@ -419,13 +398,86 @@ void exit_mm(struct task_struct *tsk)
        __exit_mm(tsk);
 }
 
+/*
+ * When we die, we re-parent all our children.
+ * Try to give them to another thread in our thread
+ * group, and if no such member exists, give it to
+ * the global child reaper process (ie "init")
+ */
+static inline void forget_original_parent(struct task_struct * father)
+{
+       struct task_struct *p, *reaper;
+       list_t *_p;
+
+       read_lock(&tasklist_lock);
+
+       /* Next in our thread group, if they're not already exiting */
+       reaper = father;
+       do {
+               reaper = next_thread(reaper);
+               if (!(reaper->flags & PF_EXITING))
+                       break;
+       } while (reaper != father);
+
+       if (reaper == father)
+               reaper = child_reaper;
+
+       /*
+        * There are only two places where our children can be:
+        *
+        * - in our child list
+        * - in the global ptrace list
+        *
+        * Search them and reparent children.
+        */
+       list_for_each(_p, &father->children) {
+               p = list_entry(_p,struct task_struct,sibling);
+               reparent_thread(p, reaper, child_reaper);
+       }
+       list_for_each(_p, &father->ptrace_children) {
+               p = list_entry(_p,struct task_struct,ptrace_list);
+               reparent_thread(p, reaper, child_reaper);
+       }
+       read_unlock(&tasklist_lock);
+}
+
+static inline void zap_thread(task_t *p, task_t *father)
+{
+       ptrace_unlink(p);
+       list_del_init(&p->sibling);
+       p->ptrace = 0;
+
+       p->parent = p->real_parent;
+       list_add_tail(&p->sibling, &p->parent->children);
+       if (p->state == TASK_ZOMBIE && p->exit_signal != -1)
+               do_notify_parent(p, p->exit_signal);
+       /*
+        * process group orphan check
+        * Case ii: Our child is in a different pgrp
+        * than we are, and it was the only connection
+        * outside, so the child pgrp is now orphaned.
+        */
+       if ((p->pgrp != current->pgrp) &&
+           (p->session == current->session)) {
+               int pgrp = p->pgrp;
+
+               write_unlock_irq(&tasklist_lock);
+               if (is_orphaned_pgrp(pgrp) && has_stopped_jobs(pgrp)) {
+                       kill_pg(pgrp,SIGHUP,1);
+                       kill_pg(pgrp,SIGCONT,1);
+               }
+               write_lock_irq(&tasklist_lock);
+       }
+}
+
 /*
  * Send signals to all our closest relatives so that they know
  * to properly mourn us..
  */
 static void exit_notify(void)
 {
-       struct task_struct * p, *t;
+       struct task_struct *t;
+       list_t *_p, *_n;
 
        forget_original_parent(current);
        /*
@@ -484,33 +536,20 @@ static void exit_notify(void)
        current->state = TASK_ZOMBIE;
        if (current->exit_signal != -1)
                do_notify_parent(current, current->exit_signal);
-       while ((p = eldest_child(current))) {
-               list_del_init(&p->sibling);
-               p->ptrace = 0;
-
-               p->parent = p->real_parent;
-               list_add_tail(&p->sibling,&p->parent->children);
-               if (p->state == TASK_ZOMBIE && p->exit_signal != -1)
-                       do_notify_parent(p, p->exit_signal);
-               /*
-                * process group orphan check
-                * Case ii: Our child is in a different pgrp
-                * than we are, and it was the only connection
-                * outside, so the child pgrp is now orphaned.
-                */
-               if ((p->pgrp != current->pgrp) &&
-                   (p->session == current->session)) {
-                       int pgrp = p->pgrp;
-
-                       write_unlock_irq(&tasklist_lock);
-                       if (is_orphaned_pgrp(pgrp) && has_stopped_jobs(pgrp)) {
-                               kill_pg(pgrp,SIGHUP,1);
-                               kill_pg(pgrp,SIGCONT,1);
-                       }
-                       write_lock_irq(&tasklist_lock);
-               }
-       }
 
+zap_again:
+       list_for_each_safe(_p, _n, &current->children)
+               zap_thread(list_entry(_p,struct task_struct,sibling), current);
+       list_for_each_safe(_p, _n, &current->ptrace_children)
+               zap_thread(list_entry(_p,struct task_struct,ptrace_list), current);
+       /*
+        * reparent_thread might drop the tasklist lock, thus we could
+        * have new children queued back from the ptrace list into the
+        * child list:
+        */
+       if (unlikely(!list_empty(&current->children) ||
+                       !list_empty(&current->ptrace_children)))
+               goto zap_again;
        /*
         * No need to unlock IRQs, we'll schedule() immediately
         * anyway. In the preemption case this also makes it
@@ -623,6 +662,12 @@ repeat:
                                if (p->pgrp != -pid)
                                        continue;
                        }
+                       /*
+                        * Do not consider detached threads that are
+                        * not ptraced:
+                        */
+                       if (p->exit_signal == -1 && !p->ptrace)
+                               continue;
                        /* Wait for all children (clone and not) if __WALL is set;
                         * otherwise, wait for clone children *only* if __WCLONE is
                         * set; otherwise, wait for non-clone children *only*.  (Note:
@@ -667,7 +712,7 @@ repeat:
                                if (retval)
                                        goto end_wait4; 
                                retval = p->pid;
-                               if (p->real_parent != p->parent) {
+                               if (p->real_parent != p->parent || p->ptrace) {
                                        write_lock_irq(&tasklist_lock);
                                        remove_parent(p);
                                        p->parent = p->real_parent;
index 6d0ec09abe0c41254f92288e0dbf3d7ddd195f0b..f13f6ae9e457cff405018ea2ce80e29346ffc14c 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/fs.h>
 #include <linux/security.h>
 #include <linux/futex.h>
+#include <linux/ptrace.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -808,6 +809,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
         */
        p->tgid = p->pid;
        INIT_LIST_HEAD(&p->thread_group);
+       INIT_LIST_HEAD(&p->ptrace_children);
+       INIT_LIST_HEAD(&p->ptrace_list);
 
        /* Need tasklist lock for parent etc handling! */
        write_lock_irq(&tasklist_lock);
@@ -827,6 +830,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        }
 
        SET_LINKS(p);
+       ptrace_link(p, p->parent);
        hash_pid(p);
        nr_threads++;
        write_unlock_irq(&tasklist_lock);
index 97222b936a3e929eabeec24414df5b122844a4f3..8db5da3f834940d20b5ef064d82478f8d08e10c6 100644 (file)
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
 #include <linux/smp_lock.h>
+#include <linux/ptrace.h>
 
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
 
+/*
+ * ptrace a task: make the debugger its new parent and
+ * move it to the ptrace list.
+ *
+ * Must be called with the tasklist lock write-held.
+ */
+void __ptrace_link(task_t *child, task_t *new_parent)
+{
+       if (!list_empty(&child->ptrace_list))
+               BUG();
+       if (child->parent == new_parent)
+               BUG();
+       list_add(&child->ptrace_list, &child->parent->ptrace_children);
+       REMOVE_LINKS(child);
+       child->parent = new_parent;
+       SET_LINKS(child);
+}
+/*
+ * unptrace a task: move it back to its original parent and
+ * remove it from the ptrace list.
+ *
+ * Must be called with the tasklist lock write-held.
+ */
+void __ptrace_unlink(task_t *child)
+{
+       if (!child->ptrace)
+               BUG();
+       child->ptrace = 0;
+       if (list_empty(&child->ptrace_list))
+               return;
+       list_del_init(&child->ptrace_list);
+       REMOVE_LINKS(child);
+       child->parent = child->real_parent;
+       SET_LINKS(child);
+}
+
 /*
  * Check that we have indeed attached to the thing..
  */
@@ -75,11 +113,7 @@ int ptrace_attach(struct task_struct *task)
        task_unlock(task);
 
        write_lock_irq(&tasklist_lock);
-       if (task->parent != current) {
-               REMOVE_LINKS(task);
-               task->parent = current;
-               SET_LINKS(task);
-       }
+       __ptrace_link(task, current);
        write_unlock_irq(&tasklist_lock);
 
        send_sig(SIGSTOP, task, 1);
@@ -99,16 +133,15 @@ int ptrace_detach(struct task_struct *child, unsigned int data)
        ptrace_disable(child);
 
        /* .. re-parent .. */
-       child->ptrace = 0;
        child->exit_code = data;
+
        write_lock_irq(&tasklist_lock);
-       REMOVE_LINKS(child);
-       child->parent = child->real_parent;
-       SET_LINKS(child);
+       __ptrace_unlink(child);
+       /* .. and wake it up. */
+       if (child->state != TASK_ZOMBIE)
+               wake_up_process(child);
        write_unlock_irq(&tasklist_lock);
 
-       /* .. and wake it up. */
-       wake_up_process(child);
        return 0;
 }