DO_ACTION( mask, 0, |= 0x00010000, io_apic_sync()) /* mask = 1 */
DO_ACTION( unmask, 0, &= 0xfffeffff, ) /* mask = 0 */
-static void __init clear_IO_APIC_pin(unsigned int pin)
+static void clear_IO_APIC_pin(unsigned int pin)
{
struct IO_APIC_route_entry entry;
io_apic_write(0x11 + 2 * pin, *(((int *)&entry) + 1));
}
+static void clear_IO_APIC (void)
+{
+ int pin;
+
+ for (pin = 0; pin < nr_ioapic_registers; pin++)
+ clear_IO_APIC_pin(pin);
+}
/*
* support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
/*
* Set up a certain pin as ExtINT delivered interrupt
*/
-void __init setup_ExtINT_pin(unsigned int pin)
+void __init setup_ExtINT_pin(unsigned int pin, int irq)
{
struct IO_APIC_route_entry entry;
memset(&entry,0,sizeof(entry));
entry.delivery_mode = dest_ExtINT;
- entry.dest_mode = 1; /* logical delivery */
+ entry.dest_mode = 0; /* physical delivery */
entry.mask = 0; /* unmask IRQ now */
/*
- * Careful with this one. We do not use 'true' logical
- * delivery, as we set local APICs to LDR == 0. But
- * 0xff logical destination is special (broadcast).
- * Any other combination will cause problems.
+ * We use physical delivery to get the timer IRQ
+ * to the boot CPU. 'boot_cpu_id' is the physical
+ * APIC ID of the boot CPU.
*/
- entry.dest.logical.logical_dest = 0xff;
+ entry.dest.physical.physical_dest = boot_cpu_id;
- entry.vector = 0; /* it's ignored */
+ entry.vector = assign_irq_vector(irq);
entry.polarity = 0;
entry.trigger = 0;
static void __init init_sym_mode(void)
{
- int i, pin;
+ int i;
for (i = 0; i < PIN_MAP_SIZE; i++) {
irq_2_pin[i].pin = -1;
/*
* Do not trust the IO-APIC being empty at bootup
*/
- for (pin = 0; pin < nr_ioapic_registers; pin++)
- clear_IO_APIC_pin(pin);
+ clear_IO_APIC();
}
/*
*/
void init_pic_mode(void)
{
+ /*
+ * Clear the IO-APIC before rebooting:
+ */
+ clear_IO_APIC();
+
+ /*
+ * Put it back into PIC mode (has an effect only on
+ * certain boards)
+ */
printk("disabling symmetric IO mode... ");
outb_p(0x70, 0x22);
outb_p(0x00, 0x23);
if (pin2 != -1) {
printk(".. (found pin %d) ...", pin2);
- setup_ExtINT_pin(pin2);
+ setup_ExtINT_pin(pin2, 0);
make_8259A_irq(0);
}
val = K_HOLE;
} else
val = (i ? K_HOLE : K_NOSUCHMAP);
- return __put_user(val, &user_kbe->kb_value);
+ return put_user(val, &user_kbe->kb_value);
case KDSKBENT:
if (!perm)
return -EPERM;
case KDGETKEYCODE:
kc = getkeycode(tmp.scancode);
if (kc >= 0)
- kc = __put_user(kc, &user_kbkc->keycode);
+ kc = put_user(kc, &user_kbkc->keycode);
break;
case KDSETKEYCODE:
if (!perm)
p = func_table[i];
if(p)
for ( ; *p && sz; p++, sz--)
- __put_user(*p, q++);
- __put_user('\0', q);
+ put_user(*p, q++);
+ put_user('\0', q);
return ((p && *p) ? -EOVERFLOW : 0);
case KDSKBSENT:
if (!perm)
{
struct kbdiacrs *a = (struct kbdiacrs *)arg;
- i = verify_area(VERIFY_WRITE, (void *) a, sizeof(struct kbdiacrs));
- if (i)
- return i;
- __put_user(accent_table_size, &a->kb_cnt);
- __copy_to_user(a->kbdiacr, accent_table,
- accent_table_size*sizeof(struct kbdiacr));
+ if (put_user(accent_table_size, &a->kb_cnt))
+ return -EFAULT;
+ if (copy_to_user(a->kbdiacr, accent_table, accent_table_size*sizeof(struct kbdiacr)))
+ return -EFAULT;
return 0;
}
if (!perm)
return -EPERM;
- i = verify_area(VERIFY_READ, (void *) a, sizeof(struct kbdiacrs));
- if (i)
- return i;
- __get_user(ct,&a->kb_cnt);
+ if (get_user(ct,&a->kb_cnt))
+ return -EFAULT;
if (ct >= MAX_DIACR)
return -EINVAL;
accent_table_size = ct;
- __copy_from_user(accent_table, a->kbdiacr, ct*sizeof(struct kbdiacr));
+ if (copy_from_user(accent_table, a->kbdiacr, ct*sizeof(struct kbdiacr)))
+ return -EFAULT;
return 0;
}
i = verify_area(VERIFY_WRITE,(void *)vtstat, sizeof(struct vt_stat));
if (i)
return i;
- __put_user(fg_console + 1, &vtstat->v_active);
+ put_user(fg_console + 1, &vtstat->v_active);
state = 1; /* /dev/tty0 is always open */
for (i = 0, mask = 2; i < MAX_NR_CONSOLES && mask; ++i, mask <<= 1)
if (VT_IS_IN_USE(i))
state |= mask;
- return __put_user(state, &vtstat->v_state);
+ return put_user(state, &vtstat->v_state);
}
/*
i = verify_area(VERIFY_READ, (void *)vtsizes, sizeof(struct vt_sizes));
if (i)
return i;
- __get_user(ll, &vtsizes->v_rows);
- __get_user(cc, &vtsizes->v_cols);
+ get_user(ll, &vtsizes->v_rows);
+ get_user(cc, &vtsizes->v_cols);
return vc_resize_all(ll, cc);
}
i = verify_area(VERIFY_READ, (void *)vtconsize, sizeof(struct vt_consize));
if (i)
return i;
- __get_user(ll, &vtconsize->v_rows);
- __get_user(cc, &vtconsize->v_cols);
- __get_user(vlin, &vtconsize->v_vlin);
- __get_user(clin, &vtconsize->v_clin);
- __get_user(vcol, &vtconsize->v_vcol);
- __get_user(ccol, &vtconsize->v_ccol);
+ get_user(ll, &vtconsize->v_rows);
+ get_user(cc, &vtconsize->v_cols);
+ get_user(vlin, &vtconsize->v_vlin);
+ get_user(clin, &vtconsize->v_clin);
+ get_user(vcol, &vtconsize->v_vcol);
+ get_user(ccol, &vtconsize->v_ccol);
vlin = vlin ? vlin : video_scan_lines;
if ( clin )
{
/*
* Dispose-list gets a local list, so it doesn't need to
- * worry about list corruption.
+ * worry about list corruption. It releases the inode lock
+ * while clearing the inodes.
*/
static void dispose_list(struct list_head * head)
{
struct list_head *next;
int count = 0;
+ spin_unlock(&inode_lock);
next = head->next;
for (;;) {
struct list_head * tmp = next;
spin_lock(&inode_lock);
list_splice(head, &inode_unused);
inodes_stat.nr_free_inodes += count;
- spin_unlock(&inode_lock);
}
/*
spin_lock(&inode_lock);
busy = invalidate_list(&inode_in_use, sb, &throw_away);
busy |= invalidate_list(&sb->s_dirty, sb, &throw_away);
- spin_unlock(&inode_lock);
-
dispose_list(&throw_away);
+ spin_unlock(&inode_lock);
return busy;
}
/*
* This is called with the inode lock held. It searches
- * the in-use for the specified number of freeable inodes.
- * Freeable inodes are moved to a temporary list and then
- * placed on the unused list by dispose_list.
+ * the in-use for freeable inodes, which are moved to a
+ * temporary list and then placed on the unused list by
+ * dispose_list.
+ *
+ * We don't expect to have to call this very often.
*
- * Note that we do not expect to have to search very hard:
- * the freeable inodes will be at the old end of the list.
- *
- * N.B. The spinlock is released to call dispose_list.
+ * N.B. The spinlock is released during the call to
+ * dispose_list.
*/
#define CAN_UNUSE(inode) \
- (((inode)->i_count == 0) && \
- (!(inode)->i_state))
+ (((inode)->i_count | (inode)->i_state) == 0)
+#define INODE(entry) (list_entry(entry, struct inode, i_list))
-static int free_inodes(int goal)
+static int free_inodes(void)
{
- struct list_head *tmp, *head = &inode_in_use;
- LIST_HEAD(freeable);
- int found = 0, depth = goal << 1;
+ struct list_head list, *entry, *freeable = &list;
+ int found = 0;
- while ((tmp = head->prev) != head && depth--) {
- struct inode * inode = list_entry(tmp, struct inode, i_list);
+ INIT_LIST_HEAD(freeable);
+ entry = inode_in_use.next;
+ while (entry != &inode_in_use) {
+ struct list_head *tmp = entry;
+
+ entry = entry->next;
+ if (!CAN_UNUSE(INODE(tmp)))
+ continue;
list_del(tmp);
- if (CAN_UNUSE(inode)) {
- list_del(&inode->i_hash);
- INIT_LIST_HEAD(&inode->i_hash);
- list_add(tmp, &freeable);
- if (++found < goal)
- continue;
- break;
- }
- list_add(tmp, head);
+ list_del(&INODE(tmp)->i_hash);
+ INIT_LIST_HEAD(&INODE(tmp)->i_hash);
+ list_add(tmp, freeable);
+ found = 1;
}
+
if (found) {
- spin_unlock(&inode_lock);
- dispose_list(&freeable);
- spin_lock(&inode_lock);
+ dispose_list(freeable);
+ found = 1; /* silly compiler */
}
+
return found;
}
static void try_to_free_inodes(int goal)
{
shrink_dentry_inodes(goal);
- if (!free_inodes(goal))
+ if (!free_inodes())
shrink_dentry_inodes(goal);
}
void free_inode_memory(int goal)
{
spin_lock(&inode_lock);
- free_inodes(goal);
+ free_inodes();
spin_unlock(&inode_lock);
}
inodes_stat.preshrink = 1;
spin_lock(&inode_lock);
- free_inodes(inodes_stat.nr_inodes >> 2);
+ free_inodes();
{
struct list_head *tmp = inode_unused.next;
if (tmp != &inode_unused) {
unsigned long rss, total_vm, locked_vm;
unsigned long def_flags;
unsigned long cpu_vm_mask;
+ unsigned long swap_cnt; /* number of pages to swap on next pass */
+ unsigned long swap_address;
/*
* This is an architecture-specific pointer: the portable
* part of Linux does not know about any segments.
0, 0, 0, \
0, 0, 0, 0, \
0, 0, 0, \
- 0, 0, NULL }
+ 0, 0, 0, 0, NULL }
struct signal_struct {
atomic_t count;
/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
unsigned long min_flt, maj_flt, nswap, cmin_flt, cmaj_flt, cnswap;
int swappable:1;
- unsigned long swap_address;
- unsigned long swap_cnt; /* number of pages to swap on next pass */
/* process credentials */
uid_t uid,euid,suid,fsuid;
gid_t gid,egid,sgid,fsgid;
/* utime */ {0,0,0,0},0, \
/* per CPU times */ {0, }, {0, }, \
/* flt */ 0,0,0,0,0,0, \
-/* swp */ 0,0,0, \
+/* swp */ 0, \
/* process credentials */ \
/* uid etc */ 0,0,0,0,0,0,0,0, \
/* suppl grps*/ 0, {0,}, \
static int init(void *);
extern int bdflush(void *);
extern int kswapd(void *);
+extern int kpiod(void *);
extern void kswapd_setup(void);
extern void init_IRQ(void);
kernel_thread(bdflush, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
/* Start the background pageout daemon. */
kswapd_setup();
+ kernel_thread(kpiod, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
kernel_thread(kswapd, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
#if CONFIG_AP1000
EXPORT_SYMBOL(update_vm_cache);
EXPORT_SYMBOL(vmtruncate);
EXPORT_SYMBOL(find_vma);
+EXPORT_SYMBOL(get_unmapped_area);
/* filesystem internal functions */
EXPORT_SYMBOL(in_group_p);
sched_data->prevstate = prev->state;
+/* this is the scheduler proper: */
{
struct task_struct * p = init_task.next_run;
+ int c = -1000;
+
+ /* Default process to select.. */
+ next = idle_task;
+ if (prev->state == TASK_RUNNING) {
+ c = goodness(prev, prev, this_cpu);
+ next = prev;
+ }
+
/*
* This is subtle.
* Note how we can enable interrupts here, even
* the scheduler lock
*/
spin_unlock_irq(&runqueue_lock);
-#ifdef __SMP__
- prev->has_cpu = 0;
-#endif
-
/*
* Note! there may appear new tasks on the run-queue during this, as
* interrupts are enabled. However, they will be put on front of the
* list, so our list starting at "p" is essentially fixed.
*/
-/* this is the scheduler proper: */
- {
- int c = -1000;
- next = idle_task;
- while (p != &init_task) {
- if (can_schedule(p)) {
- int weight = goodness(p, prev, this_cpu);
- if (weight > c)
- c = weight, next = p;
- }
- p = p->next_run;
+ while (p != &init_task) {
+ if (can_schedule(p)) {
+ int weight = goodness(p, prev, this_cpu);
+ if (weight > c)
+ c = weight, next = p;
}
+ p = p->next_run;
+ }
- /* Do we need to re-calculate counters? */
- if (!c) {
- struct task_struct *p;
- read_lock(&tasklist_lock);
- for_each_task(p)
- p->counter = (p->counter >> 1) + p->priority;
- read_unlock(&tasklist_lock);
- }
+ /* Do we need to re-calculate counters? */
+ if (!c) {
+ struct task_struct *p;
+ read_lock(&tasklist_lock);
+ for_each_task(p)
+ p->counter = (p->counter >> 1) + p->priority;
+ read_unlock(&tasklist_lock);
}
}
* thus we have to lock the previous process from getting
* rescheduled during switch_to().
*/
- prev->has_cpu = 1;
-
- next->has_cpu = 1;
next->processor = this_cpu;
+ next->has_cpu = 1;
spin_unlock(&scheduler_lock);
#endif /* __SMP__ */
if (prev != next) {
#include <linux/blkdev.h>
#include <linux/file.h>
#include <linux/swapctl.h>
+#include <linux/slab.h>
#include <asm/pgtable.h>
#include <asm/uaccess.h>
#define release_page(page) __free_page((page))
+/*
+ * Define a request structure for outstanding page write requests
+ * to the background page io daemon
+ */
+
+struct pio_request
+{
+ struct pio_request * next;
+ struct file * file;
+ unsigned long offset;
+ unsigned long page;
+};
+static struct pio_request *pio_first = NULL, **pio_last = &pio_first;
+static kmem_cache_t *pio_request_cache;
+static struct wait_queue *pio_wait = NULL;
+
+static inline void
+make_pio_request(struct file *, unsigned long, unsigned long);
+
+
/*
* Invalidate the pages of an inode, removing all pages that aren't
* locked down (those are sure to be up-to-date anyway, so we shouldn't
}
static int filemap_write_page(struct vm_area_struct * vma,
- unsigned long offset,
- unsigned long page)
+ unsigned long offset,
+ unsigned long page,
+ int wait)
{
int result;
struct file * file;
* and file could be released ... increment the count to be safe.
*/
file->f_count++;
+
+ /*
+ * If this is a swapping operation rather than msync(), then
+ * leave the actual IO, and the restoration of the file count,
+ * to the kpiod thread. Just queue the request for now.
+ */
+ if (!wait) {
+ make_pio_request(file, offset, page);
+ return 0;
+ }
+
down(&inode->i_sem);
result = do_write_page(inode, file, (const char *) page, offset);
up(&inode->i_sem);
*/
int filemap_swapout(struct vm_area_struct * vma, struct page * page)
{
- return filemap_write_page(vma, page->offset, page_address(page));
+ return filemap_write_page(vma, page->offset, page_address(page), 0);
}
static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
return 0;
}
}
- error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, page);
+ error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, page, 1);
free_page(page);
return error;
}
wake_up(&page->wait);
__free_page(page);
}
+
+
+/* Add request for page IO to the queue */
+
+static inline void put_pio_request(struct pio_request *p)
+{
+ *pio_last = p;
+ p->next = NULL;
+ pio_last = &p->next;
+}
+
+/* Take the first page IO request off the queue */
+
+static inline struct pio_request * get_pio_request(void)
+{
+ struct pio_request * p = pio_first;
+ pio_first = p->next;
+ if (!pio_first)
+ pio_last = &pio_first;
+ return p;
+}
+
+/* Make a new page IO request and queue it to the kpiod thread */
+
+static inline void make_pio_request(struct file *file,
+ unsigned long offset,
+ unsigned long page)
+{
+ struct pio_request *p;
+
+ atomic_inc(&mem_map[MAP_NR(page)].count);
+
+ /*
+ * We need to allocate without causing any recursive IO in the
+ * current thread's context. We might currently be swapping out
+ * as a result of an allocation made while holding a critical
+ * filesystem lock. To avoid deadlock, we *MUST* not reenter
+ * the filesystem in this thread.
+ *
+ * We can wait for kswapd to free memory, or we can try to free
+ * pages without actually performing further IO, without fear of
+ * deadlock. --sct
+ */
+
+ while ((p = kmem_cache_alloc(pio_request_cache, GFP_BUFFER)) == NULL) {
+ if (try_to_free_pages(__GFP_WAIT))
+ continue;
+ current->state = TASK_INTERRUPTIBLE;
+ schedule_timeout(HZ/10);
+ }
+
+ p->file = file;
+ p->offset = offset;
+ p->page = page;
+
+ put_pio_request(p);
+ wake_up(&pio_wait);
+}
+
+
+/*
+ * This is the only thread which is allowed to write out filemap pages
+ * while swapping.
+ *
+ * To avoid deadlock, it is important that we never reenter this thread.
+ * Although recursive memory allocations within this thread may result
+ * in more page swapping, that swapping will always be done by queuing
+ * another IO request to the same thread: we will never actually start
+ * that IO request until we have finished with the current one, and so
+ * we will not deadlock.
+ */
+
+int kpiod(void * unused)
+{
+ struct wait_queue wait = {current};
+ struct inode * inode;
+ struct dentry * dentry;
+ struct pio_request * p;
+
+ current->session = 1;
+ current->pgrp = 1;
+ strcpy(current->comm, "kpiod");
+ sigfillset(¤t->blocked);
+ init_waitqueue(&pio_wait);
+
+ lock_kernel();
+
+ pio_request_cache = kmem_cache_create("pio_request",
+ sizeof(struct pio_request),
+ 0, SLAB_HWCACHE_ALIGN,
+ NULL, NULL);
+ if (!pio_request_cache)
+ panic ("Could not create pio_request slab cache");
+
+ while (1) {
+ current->state = TASK_INTERRUPTIBLE;
+ add_wait_queue(&pio_wait, &wait);
+ while (!pio_first)
+ schedule();
+ remove_wait_queue(&pio_wait, &wait);
+ current->state = TASK_RUNNING;
+
+ while (pio_first) {
+ p = get_pio_request();
+ dentry = p->file->f_dentry;
+ inode = dentry->d_inode;
+
+ down(&inode->i_sem);
+ do_write_page(inode, p->file,
+ (const char *) p->page, p->offset);
+ up(&inode->i_sem);
+ fput(p->file);
+ free_page(p->page);
+ kmem_cache_free(pio_request_cache, p);
+ }
+ }
+}
do {
int result;
- tsk->swap_address = address + PAGE_SIZE;
+ tsk->mm->swap_address = address + PAGE_SIZE;
result = try_to_swap_out(tsk, vma, address, pte, gfp_mask);
if (result)
return result;
/*
* Go through process' page directory.
*/
- address = p->swap_address;
+ address = p->mm->swap_address;
/*
* Find the proper vm-area
}
/* We didn't find anything for the process */
- p->swap_cnt = 0;
- p->swap_address = 0;
+ p->mm->swap_cnt = 0;
+ p->mm->swap_address = 0;
return 0;
}
continue;
/* Refresh swap_cnt? */
if (assign)
- p->swap_cnt = p->mm->rss;
- if (p->swap_cnt > max_cnt) {
- max_cnt = p->swap_cnt;
+ p->mm->swap_cnt = p->mm->rss;
+ if (p->mm->swap_cnt > max_cnt) {
+ max_cnt = p->mm->swap_cnt;
pbest = p;
}
}
for (req = sp->tp_pinfo.af_tcp.syn_wait_queue; req;
i++, req = req->dl_next) {
+ if (req->sk)
+ continue;
pos += 128;
if (pos < offset)
continue;
}
#endif /* CONFIG_FILTER */
- /*
- * socket locking is here for SMP purposes as backlog rcv
- * is currently called with bh processing disabled.
- */
- lock_sock(sk);
-
/*
* This doesn't check if the socket has enough room for the packet.
* Either process the packet _without_ queueing it and then free it,
if (sk->state == TCP_ESTABLISHED) { /* Fast path */
if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
goto reset;
- release_sock(sk);
return 0;
}
nsk = tcp_v4_hnd_req(sk, skb);
if (!nsk)
goto discard;
- lock_sock(nsk);
- release_sock(sk);
+
+ /*
+ * Queue it on the new socket if the new socket is active,
+ * otherwise we just shortcircuit this and continue with
+ * the new socket..
+ */
+ if (atomic_read(&nsk->sock_readers)) {
+ __skb_queue_tail(&nsk->back_log, skb);
+ return 0;
+ }
sk = nsk;
}
if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
goto reset;
- release_sock(sk);
return 0;
reset:
* might be destroyed here. This current version compiles correctly,
* but you have been warned.
*/
- release_sock(sk);
return 0;
}