]> git.neil.brown.name Git - history.git/commitdiff
[PATCH] prepare_to_wait/finish_wait sleep/wakeup API
authorAndrew Morton <akpm@digeo.com>
Wed, 25 Sep 2002 14:20:08 +0000 (07:20 -0700)
committerLinus Torvalds <torvalds@home.transmeta.com>
Wed, 25 Sep 2002 14:20:08 +0000 (07:20 -0700)
This is worth a whopping 2% on spwecweb on an 8-way.  Which is faintly
surprising because __wake_up and other wait/wakeup functions are not
apparent in the specweb profiles which I've seen.

The main objective of this is to reduce the CPU cost of the wait/wakeup
operation.  When a task is woken up, its waitqueue is removed from the
waitqueue_head by the waker (ie: immediately), rather than by the woken
process.

This means that a subsequent wakeup does not need to revisit the
just-woken task.  It also means that the just-woken task does not need
to take the waitqueue_head's lock, which may well reside in another
CPU's cache.

I have no decent measurements on the effect of this change - possibly a
20-30% drop in __wake_up cost in Badari's 40-dds-to-40-disks test (it
was the most expensive function), but it's inconclusive.  And no
quantitative testing of which I am aware has been performed by
networking people.

The API is very simple to use (Linus thought it up):

my_func(waitqueue_head_t *wqh)
{
DEFINE_WAIT(wait);

prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
if (!some_test)
schedule();
finish_wait(wqh, &wait);
}

or:

DEFINE_WAIT(wait);

while (!some_test_1) {
prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
if (!some_test_2)
schedule();
...
}
finish_wait(wqh, &wait);

You need to bear in mind that once prepare_to_wait has been performed,
your task could be removed from the waitqueue_head and placed into
TASK_RUNNING at any time.  You don't know whether or not you're still
on the waitqueue_head.

Running prepare_to_wait() when you're already on the waitqueue_head is
fine - it will do the right thing.

Running finish_wait() when you're actually not on the waitqueue_head is
fine.

Running finish_wait() when you've _never_ been on the waitqueue_head is
fine, as ling as the DEFINE_WAIT() macro was used to initialise the
waitqueue.

You don't need to fiddle with current->state.  prepare_to_wait() and
finish_wait() will do that.  finish_wait() will always return in state
TASK_RUNNING.

There are plenty of usage examples in vm-wakeups.patch and
tcp-wakeups.patch.

include/linux/wait.h
kernel/fork.c
kernel/ksyms.c

index 8664b02f230da8fec4a99154f4d7fe5c94a9ecbb..b6ce459f879238dbe7584ae04d807ca0e58074d4 100644 (file)
@@ -119,6 +119,32 @@ static inline void __remove_wait_queue(wait_queue_head_t *head,
                _raced;                                         \
        })
 
+/*
+ * Waitqueue's which are removed from the waitqueue_head at wakeup time
+ */
+void FASTCALL(prepare_to_wait(wait_queue_head_t *q,
+                               wait_queue_t *wait, int state));
+void FASTCALL(prepare_to_wait_exclusive(wait_queue_head_t *q,
+                               wait_queue_t *wait, int state));
+void FASTCALL(finish_wait(wait_queue_head_t *q, wait_queue_t *wait));
+int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync);
+
+#define DEFINE_WAIT(name)                                              \
+       wait_queue_t name = {                                           \
+               .task           = current,                              \
+               .func           = autoremove_wake_function,             \
+               .task_list      = {     .next = &name.task_list,        \
+                                       .prev = &name.task_list,        \
+                               },                                      \
+       }
+
+#define init_wait(wait)                                                        \
+       do {                                                            \
+               wait->task = current;                                   \
+               wait->func = autoremove_wake_function;                  \
+               INIT_LIST_HEAD(&wait->task_list);                       \
+       } while (0)
+       
 #endif /* __KERNEL__ */
 
 #endif
index 062a4d1f9c3e00f50f0209a9555dbac12205635f..5880309f3fee1204b8369c3fc28f309385aea832 100644 (file)
@@ -103,6 +103,52 @@ void remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
        spin_unlock_irqrestore(&q->lock, flags);
 }
 
+void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
+{
+       unsigned long flags;
+
+       __set_current_state(state);
+       wait->flags &= ~WQ_FLAG_EXCLUSIVE;
+       spin_lock_irqsave(&q->lock, flags);
+       if (list_empty(&wait->task_list))
+               __add_wait_queue(q, wait);
+       spin_unlock_irqrestore(&q->lock, flags);
+}
+
+void
+prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
+{
+       unsigned long flags;
+
+       __set_current_state(state);
+       wait->flags |= WQ_FLAG_EXCLUSIVE;
+       spin_lock_irqsave(&q->lock, flags);
+       if (list_empty(&wait->task_list))
+               __add_wait_queue_tail(q, wait);
+       spin_unlock_irqrestore(&q->lock, flags);
+}
+
+void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
+{
+       unsigned long flags;
+
+       __set_current_state(TASK_RUNNING);
+       if (!list_empty(&wait->task_list)) {
+               spin_lock_irqsave(&q->lock, flags);
+               list_del_init(&wait->task_list);
+               spin_unlock_irqrestore(&q->lock, flags);
+       }
+}
+
+int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync)
+{
+       int ret = default_wake_function(wait, mode, sync);
+
+       if (ret)
+               list_del_init(&wait->task_list);
+       return ret;
+}
+
 void __init fork_init(unsigned long mempages)
 {
        /* create a slab on which task_structs can be allocated */
index 557ae8f7ded2b05cd89319c8997af9b968e81ded..cd69b97e8e9d1affc03a16ae53f89ba583465227 100644 (file)
@@ -400,6 +400,10 @@ EXPORT_SYMBOL(irq_stat);
 EXPORT_SYMBOL(add_wait_queue);
 EXPORT_SYMBOL(add_wait_queue_exclusive);
 EXPORT_SYMBOL(remove_wait_queue);
+EXPORT_SYMBOL(prepare_to_wait);
+EXPORT_SYMBOL(prepare_to_wait_exclusive);
+EXPORT_SYMBOL(finish_wait);
+EXPORT_SYMBOL(autoremove_wake_function);
 
 /* completion handling */
 EXPORT_SYMBOL(wait_for_completion);