]> git.neil.brown.name Git - history.git/commitdiff
[PATCH] fix for potential deadlock after posix-timers change
authorRoland McGrath <roland@redhat.com>
Tue, 20 Apr 2004 00:20:06 +0000 (17:20 -0700)
committerLinus Torvalds <torvalds@ppc970.osdl.org>
Tue, 20 Apr 2004 00:20:06 +0000 (17:20 -0700)
Ulrich has been working on the glibc code using posix-timers and
stressing it more now than it has before.  He ran into an SMP deadlock
on process exit in the case there are pending queued signals from a
timer.

The deadlock arises because in the path through exit_itimers, the
tasklist_lock is already held (for writing).  When a timer is being
deleted, sigqueue_free will try to take it (for reading) in the case
where that timer has a pending signal queued on somebody's queue.  This
patch avoids the problem by making sure the queues are flushed before
calling exit_itimers, thus ensuring its code path won't try to take
tasklist_lock.

kernel/signal.c

index 0232084c1f65204643760a2db6a87162fb32a971..a9181552a76e0e6c26a3255f605a6467163a06e0 100644 (file)
@@ -352,10 +352,8 @@ void __exit_signal(struct task_struct *tsk)
                if (tsk == sig->curr_target)
                        sig->curr_target = next_thread(tsk);
                tsk->signal = NULL;
-               exit_itimers(sig);
                spin_unlock(&sighand->siglock);
                flush_sigqueue(&sig->shared_pending);
-               kmem_cache_free(signal_cachep, sig);
        } else {
                /*
                 * If there is any task waiting for the group exit
@@ -369,9 +367,28 @@ void __exit_signal(struct task_struct *tsk)
                        sig->curr_target = next_thread(tsk);
                tsk->signal = NULL;
                spin_unlock(&sighand->siglock);
+               sig = NULL;     /* Marker for below.  */
        }
        clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
        flush_sigqueue(&tsk->pending);
+       if (sig) {
+               /*
+                * We are cleaning up the signal_struct here.  We delayed
+                * calling exit_itimers until after flush_sigqueue, just in
+                * case our thread-local pending queue contained a queued
+                * timer signal that would have been cleared in
+                * exit_itimers.  When that called sigqueue_free, it would
+                * attempt to re-take the tasklist_lock and deadlock.  This
+                * can never happen if we ensure that all queues the
+                * timer's signal might be queued on have been flushed
+                * first.  The shared_pending queue, and our own pending
+                * queue are the only queues the timer could be on, since
+                * there are no other threads left in the group and timer
+                * signals are constrained to threads inside the group.
+                */
+               exit_itimers(sig);
+               kmem_cache_free(signal_cachep, sig);
+       }
 }
 
 void exit_signal(struct task_struct *tsk)