]> git.neil.brown.name Git - history.git/commitdiff
Linux 2.2.19pre2 2.2.19pre2
authorAlan Cox <alan@lxorguk.ukuu.org.uk>
Fri, 23 Nov 2007 20:23:00 +0000 (15:23 -0500)
committerAlan Cox <alan@lxorguk.ukuu.org.uk>
Fri, 23 Nov 2007 20:23:00 +0000 (15:23 -0500)
o Drop the page aging for a moment to merge the
Andrea VM
o Merge Andrea's VM-global patch (Andrea Arcangeli)

20 files changed:
Makefile
drivers/block/rd.c
fs/binfmt_aout.c
fs/binfmt_elf.c
fs/buffer.c
fs/coda/file.c
fs/dcache.c
fs/open.c
fs/read_write.c
include/linux/fs.h
include/linux/locks.h
include/linux/mm.h
include/linux/sched.h
init/main.c
ipc/shm.c
kernel/fork.c
mm/filemap.c
mm/page_alloc.c
mm/swap_state.c
mm/vmscan.c

index f057cc53e5b553dd7fd76d1f1cfc4e83d968e5c3..a751970a0f59425142abd43bb225b812acf0e886 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 2
 SUBLEVEL = 19
-EXTRAVERSION = pre1
+EXTRAVERSION = pre2
 
 ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/)
 
index f1d54b24810aa6ff3a72ebb7e421ff0c3b40bb60..e1d9f1edba38e241c0e9734d0d7cd636292b2b71 100644 (file)
@@ -173,7 +173,7 @@ repeat:
        if (CURRENT->cmd == READ) 
                memset(CURRENT->buffer, 0, len); 
        else    
-               set_bit(BH_Protected, &CURRENT->bh->b_state);
+               mark_buffer_protected(CURRENT->bh);
 
        end_request(1);
        goto repeat;
index 8da1765dd480f38cbb1f2d83c1932680b91b44d6..d56d630462d02fd89786b60869804d22f8f1185e 100644 (file)
@@ -62,9 +62,9 @@ static void set_brk(unsigned long start, unsigned long end)
 static int dump_write(struct file *file, const void *addr, int nr)
 {
        int r;
-       down(&file->f_dentry->d_inode->i_sem);
+       fs_down(&file->f_dentry->d_inode->i_sem);
        r = file->f_op->write(file, addr, nr, &file->f_pos) == nr;
-       up(&file->f_dentry->d_inode->i_sem);
+       fs_up(&file->f_dentry->d_inode->i_sem);
        return r;
 }
 
index 5d5b91b8076c55cd2c24101eec1e620c637026c5..84e9ac54c9f9a8eb7d4ac9b97860d8440b85b40c 100644 (file)
@@ -948,9 +948,9 @@ static int load_elf_library(int fd)
 static int dump_write(struct file *file, const void *addr, int nr)
 {
        int r;
-       down(&file->f_dentry->d_inode->i_sem);
+       fs_down(&file->f_dentry->d_inode->i_sem);
        r = file->f_op->write(file, addr, nr, &file->f_pos) == nr;
-       up(&file->f_dentry->d_inode->i_sem);
+       fs_up(&file->f_dentry->d_inode->i_sem);
        return r;
 }
 
index b59b5b4bb740c430a152772e0c82daab0526d952..3e27c36072f0ef9c09c9add450ad63e2c7d95507 100644 (file)
@@ -83,6 +83,7 @@ static struct wait_queue * buffer_wait = NULL;
 
 static int nr_buffers = 0;
 static int nr_buffers_type[NR_LIST] = {0,};
+static unsigned long size_buffers_type[NR_LIST];
 static int nr_buffer_heads = 0;
 static int nr_unused_buffer_heads = 0;
 static int nr_hashed_buffers = 0;
@@ -359,9 +360,9 @@ asmlinkage int sys_fsync(unsigned int fd)
                goto out_putf;
 
        /* We need to protect against concurrent writers.. */
-       down(&inode->i_sem);
+       fs_down(&inode->i_sem);
        err = file->f_op->fsync(file, dentry);
-       up(&inode->i_sem);
+       fs_up(&inode->i_sem);
 
 out_putf:
        fput(file);
@@ -396,9 +397,9 @@ asmlinkage int sys_fdatasync(unsigned int fd)
                goto out_putf;
 
        /* this needs further work, at the moment it is identical to fsync() */
-       down(&inode->i_sem);
+       fs_down(&inode->i_sem);
        err = file->f_op->fsync(file, dentry);
-       up(&inode->i_sem);
+       fs_up(&inode->i_sem);
 
 out_putf:
        fput(file);
@@ -474,6 +475,7 @@ static void remove_from_queues(struct buffer_head * bh)
                return;
        }
        nr_buffers_type[bh->b_list]--;
+       size_buffers_type[bh->b_list] -= bh->b_size;
        remove_from_hash_queue(bh);
        remove_from_lru_list(bh);
 }
@@ -523,6 +525,7 @@ static void insert_into_queues(struct buffer_head * bh)
                (*bhp)->b_prev_free = bh;
 
                nr_buffers_type[bh->b_list]++;
+               size_buffers_type[bh->b_list] += bh->b_size;
 
                /* Put the buffer in new hash-queue if it has a device. */
                bh->b_next = NULL;
@@ -571,8 +574,10 @@ struct buffer_head * get_hash_table(kdev_t dev, int block, int size)
 {
        struct buffer_head * bh;
        bh = find_buffer(dev,block,size);
-       if (bh)
+       if (bh) {
                bh->b_count++;
+               touch_buffer(bh);
+       }
        return bh;
 }
 
@@ -816,6 +821,46 @@ static inline void file_buffer(struct buffer_head *bh, int list)
        insert_into_queues(bh);
 }
 
+/* -1 -> no need to flush
+    0 -> async flush
+    1 -> sync flush (wait for I/O completation) */
+static int balance_dirty_state(kdev_t dev)
+{
+       unsigned long dirty, tot, hard_dirty_limit, soft_dirty_limit;
+
+       dirty = size_buffers_type[BUF_DIRTY] >> PAGE_SHIFT;
+       tot = (buffermem >> PAGE_SHIFT) + nr_free_pages;
+       tot -= size_buffers_type[BUF_PROTECTED] >> PAGE_SHIFT;
+
+       dirty *= 200;
+       soft_dirty_limit = tot * bdf_prm.b_un.nfract;
+       hard_dirty_limit = soft_dirty_limit * 2;
+
+       if (dirty > soft_dirty_limit)
+       {
+               if (dirty > hard_dirty_limit)
+                       return 1;
+               return 0;
+       }
+       return -1;
+}
+
+/*
+ * if a new dirty buffer is created we need to balance bdflush.
+ *
+ * in the future we might want to make bdflush aware of different
+ * pressures on different devices - thus the (currently unused)
+ * 'dev' parameter.
+ */
+void balance_dirty(kdev_t dev)
+{
+       int state = balance_dirty_state(dev);
+
+       if (state < 0)
+               return;
+       wakeup_bdflush(state);
+}
+
 /*
  * A buffer may need to be moved from one buffer list to another
  * (e.g. in case it is not shared any more). Handle this.
@@ -828,7 +873,9 @@ void refile_buffer(struct buffer_head * buf)
                printk("Attempt to refile free buffer\n");
                return;
        }
-       if (buffer_dirty(buf))
+       if (buffer_protected(buf))
+               dispose = BUF_PROTECTED;
+       else if (buffer_dirty(buf))
                dispose = BUF_DIRTY;
        else if (buffer_locked(buf))
                dispose = BUF_LOCKED;
@@ -837,13 +884,7 @@ void refile_buffer(struct buffer_head * buf)
        if(dispose != buf->b_list) {
                file_buffer(buf, dispose);
                if(dispose == BUF_DIRTY) {
-                       int too_many = (nr_buffers * bdf_prm.b_un.nfract/100);
-
-                       /* This buffer is dirty, maybe we need to start flushing.
-                        * If too high a percentage of the buffers are dirty...
-                        */
-                       if (nr_buffers_type[BUF_DIRTY] > too_many)
-                               wakeup_bdflush(1);
+                       balance_dirty(buf->b_dev);
 
                        /* If this is a loop device, and
                         * more than half of the buffers are dirty...
@@ -864,7 +905,6 @@ void __brelse(struct buffer_head * buf)
        /* If dirty, mark the time this buffer should be written back. */
        set_writetime(buf, 0);
        refile_buffer(buf);
-       touch_buffer(buf);
 
        if (buf->b_count) {
                buf->b_count--;
@@ -1457,6 +1497,7 @@ static int grow_buffers(int size)
        }
        tmp->b_this_page = bh;
        free_list[isize] = bh;
+       mem_map[MAP_NR(page)].flags = 0;
        mem_map[MAP_NR(page)].buffers = bh;
        buffermem += PAGE_SIZE;
        return 1;
@@ -1468,33 +1509,34 @@ static int grow_buffers(int size)
 #define BUFFER_BUSY_BITS       ((1<<BH_Dirty) | (1<<BH_Lock) | (1<<BH_Protected))
 #define buffer_busy(bh)                ((bh)->b_count || ((bh)->b_state & BUFFER_BUSY_BITS))
 
-static int sync_page_buffers(struct page * page, int wait)
+static void sync_page_buffers(struct page * page)
 {
-       struct buffer_head * bh = page->buffers;
-       struct buffer_head * tmp = bh;
+       struct buffer_head * tmp, * bh = page->buffers;
 
+       /*
+        * Here we'll probably sleep and so we must make sure that
+        * the page doesn't go away from under us. We also prefer any
+        * concurrent try_to_free_buffers() not to work in any way on
+        * our current page from under us since we're just working on it.
+        * As always in 2.2.x we're serialized by the big kernel lock
+        * during those hacky page-visibility manipulations.
+        *
+        * SUBTLE NOTE: for things like LVM snapshotting WRITEA will block too!
+        */
        page->buffers = NULL;
 
+       tmp = bh;
        do {
                struct buffer_head *p = tmp;
                tmp = tmp->b_this_page;
-               if (buffer_locked(p)) {
-                       if (wait)
-                               __wait_on_buffer(p);
-               } else if (buffer_dirty(p))
-                       ll_rw_block(WRITE, 1, &p);
-       } while (tmp != bh);
 
-       page->buffers = bh;
-
-       do {
-               struct buffer_head *p = tmp;
-               tmp = tmp->b_this_page;
-               if (buffer_busy(p))
-                       return 1;
+               if (buffer_dirty(p))
+                       if (test_and_set_bit(BH_Wait_IO, &p->b_state))
+                               ll_rw_block(WRITE, 1, &p);
        } while (tmp != bh);
 
-       return 0;
+       /* Restore the visibility of the page before returning. */
+       page->buffers = bh;
 }
 
 /*
@@ -1504,10 +1546,9 @@ static int sync_page_buffers(struct page * page, int wait)
  * Wake up bdflush() if this fails - if we're running low on memory due
  * to dirty buffers, we need to flush them out as quickly as possible.
  */
-int try_to_free_buffers(struct page * page_map, int wait)
+int try_to_free_buffers(struct page * page_map, int gfp_mask)
 {
        struct buffer_head * tmp, * bh = page_map->buffers;
-       int too_many;
 
        tmp = bh;
        do {
@@ -1516,8 +1557,6 @@ int try_to_free_buffers(struct page * page_map, int wait)
                tmp = tmp->b_this_page;
        } while (tmp != bh);
 
- succeed:
-       tmp = bh;
        do {
                struct buffer_head * p = tmp;
                tmp = tmp->b_this_page;
@@ -1536,25 +1575,12 @@ int try_to_free_buffers(struct page * page_map, int wait)
        return 1;
 
  busy:
-       too_many = (nr_buffers * bdf_prm.b_un.nfract/100);
+       if (gfp_mask & __GFP_IO)
+               sync_page_buffers(page_map);
 
-       if (!sync_page_buffers(page_map, wait)) {
-
-               /* If a high percentage of the buffers are dirty, 
-                * wake kflushd 
-                */
-               if (nr_buffers_type[BUF_DIRTY] > too_many)
-                       wakeup_bdflush(0);
-                       
-               /*
-                * We can jump after the busy check because
-                * we rely on the kernel lock.
-                */
-               goto succeed;
-       }
-
-       if(nr_buffers_type[BUF_DIRTY] > too_many)
+       if (balance_dirty_state(NODEV) >= 0)
                wakeup_bdflush(0);
+
        return 0;
 }
 
@@ -1566,7 +1592,7 @@ void show_buffers(void)
        int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0;
        int protected = 0;
        int nlist;
-       static char *buf_types[NR_LIST] = {"CLEAN","LOCKED","DIRTY"};
+       static char *buf_types[NR_LIST] = {"CLEAN","LOCKED","DIRTY","PROTECTED",};
 
        printk("Buffer memory:   %8ldkB\n",buffermem>>10);
        printk("Buffer heads:    %6d\n",nr_buffer_heads);
@@ -1590,7 +1616,7 @@ void show_buffers(void)
                        used++, lastused = found;
                bh = bh->b_next_free;
          } while (bh != lru_list[nlist]);
-         printk("%8s: %d buffers, %d used (last=%d), "
+         printk("%9s: %d buffers, %d used (last=%d), "
                 "%d locked, %d protected, %d dirty\n",
                 buf_types[nlist], found, used, lastused,
                 locked, protected, dirty);
@@ -1935,7 +1961,8 @@ int bdflush(void * unused)
                
                /* If there are still a lot of dirty buffers around, skip the sleep
                   and flush some more */
-               if(ndirty == 0 || nr_buffers_type[BUF_DIRTY] <= nr_buffers * bdf_prm.b_un.nfract/100) {
+               if (!ndirty || balance_dirty_state(NODEV) < 0)
+               {
                        spin_lock_irq(&current->sigmask_lock);
                        flush_signals(current);
                        spin_unlock_irq(&current->sigmask_lock);
index 46303344615da8efd060603b52982aa8ca72a6da..35967edfc9c9650f86e3e27d67e602ea55461173 100644 (file)
@@ -190,10 +190,10 @@ static ssize_t coda_file_write(struct file *coda_file, const char *buff,
                 return -1;
         }
 
-       down(&cont_inode->i_sem);
+       fs_down(&cont_inode->i_sem);
         result = cont_file.f_op->write(&cont_file , buff, count, 
                                       &(cont_file.f_pos));
-       up(&cont_inode->i_sem);
+       fs_up(&cont_inode->i_sem);
         coda_restore_codafile(coda_inode, coda_file, cont_inode, &cont_file);
        
        if (result)
@@ -228,14 +228,14 @@ int coda_fsync(struct file *coda_file, struct dentry *coda_dentry)
         coda_prepare_openfile(coda_inode, coda_file, cont_inode, 
                              &cont_file, &cont_dentry);
 
-       down(&cont_inode->i_sem);
+       fs_down(&cont_inode->i_sem);
 
         result = file_fsync(&cont_file ,&cont_dentry);
        if ( result == 0 ) {
                result = venus_fsync(coda_inode->i_sb, &(cnp->c_fid));
        }
 
-       up(&cont_inode->i_sem);
+       fs_up(&cont_inode->i_sem);
 
         coda_restore_codafile(coda_inode, coda_file, cont_inode, &cont_file);
         return result;
index 0430bb0fdb8428c94eef184fee5b929fc06344b0..e4265a5ce0534fbf3987b4b62fcc4d72ebb1c228 100644 (file)
@@ -475,9 +475,9 @@ void shrink_dcache_parent(struct dentry * parent)
  */
 void shrink_dcache_memory(int priority, unsigned int gfp_mask)
 {
-       if (gfp_mask & __GFP_IO) {
+       if (gfp_mask & __GFP_IO && !current->fs_locks) {
                int count = 0;
-               if (priority)
+               if (priority > 1)
                        count = dentry_stat.nr_unused / priority;
                prune_dcache(count, -1);
        }
index 7a9fa444e48f28cd9d033f3fca1326aa072f8d75..9f9354e971960f36b3d871b4c37434d8a0aa19cf 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -73,7 +73,7 @@ int do_truncate(struct dentry *dentry, unsigned long length)
        if ((off_t) length < 0)
                return -EINVAL;
 
-       down(&inode->i_sem);
+       fs_down(&inode->i_sem);
        newattrs.ia_size = length;
        newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
        error = notify_change(dentry, &newattrs);
@@ -83,7 +83,7 @@ int do_truncate(struct dentry *dentry, unsigned long length)
                if (inode->i_op && inode->i_op->truncate)
                        inode->i_op->truncate(inode);
        }
-       up(&inode->i_sem);
+       fs_up(&inode->i_sem);
        return error;
 }
 
index e2b5b789977b5c3ee1269f908812253af3c26b75..56c4fa41125ffc58639712c2fe36598024f7c703 100644 (file)
@@ -166,9 +166,9 @@ asmlinkage ssize_t sys_write(unsigned int fd, const char * buf, size_t count)
        if (!file->f_op || !(write = file->f_op->write))
                goto out;
 
-       down(&inode->i_sem);
+       fs_down(&inode->i_sem);
        ret = write(file, buf, count, &file->f_pos);
-       up(&inode->i_sem);
+       fs_up(&inode->i_sem);
 out:
        fput(file);
 bad_file:
@@ -314,9 +314,9 @@ asmlinkage ssize_t sys_writev(unsigned long fd, const struct iovec * vector,
        if (!file)
                goto bad_file;
        if (file->f_op && file->f_op->write && (file->f_mode & FMODE_WRITE)) {
-               down(&file->f_dentry->d_inode->i_sem);
+               fs_down(&file->f_dentry->d_inode->i_sem);
                ret = do_readv_writev(VERIFY_READ, file, vector, count);
-               up(&file->f_dentry->d_inode->i_sem);
+               fs_up(&file->f_dentry->d_inode->i_sem);
        }
        fput(file);
 
@@ -386,9 +386,9 @@ asmlinkage ssize_t sys_pwrite(unsigned int fd, const char * buf,
        if (pos < 0)
                goto out;
 
-       down(&file->f_dentry->d_inode->i_sem);
+       fs_down(&file->f_dentry->d_inode->i_sem);
        ret = write(file, buf, count, &pos);
-       up(&file->f_dentry->d_inode->i_sem);
+       fs_up(&file->f_dentry->d_inode->i_sem);
 
 out:
        fput(file);
index e2d39ed8182d01c7d096fa5487394bcb58c1191c..b7a722c93cdc0df7f4e5ec5186124e2387b19460 100644 (file)
@@ -190,6 +190,7 @@ typedef char buffer_block[BLOCK_SIZE];
 #define BH_Lock                2       /* 1 if the buffer is locked */
 #define BH_Req         3       /* 0 if the buffer has been invalidated */
 #define BH_Protected   6       /* 1 if the buffer is protected */
+#define BH_Wait_IO     7       /* 1 if we should throttle on this buffer */
 
 /*
  * Try to keep the most commonly used fields in single cache lines (16
@@ -782,7 +783,7 @@ extern struct file *inuse_filps;
 
 extern void refile_buffer(struct buffer_head * buf);
 extern void set_writetime(struct buffer_head * buf, int flag);
-extern int try_to_free_buffers(struct page *, int wait);
+extern int try_to_free_buffers(struct page *, int);
 
 extern int nr_buffers;
 extern long buffermem;
@@ -791,15 +792,25 @@ extern int nr_buffer_heads;
 #define BUF_CLEAN      0
 #define BUF_LOCKED     1       /* Buffers scheduled for write */
 #define BUF_DIRTY      2       /* Dirty buffers, not yet scheduled for write */
-#define NR_LIST                3
+#define BUF_PROTECTED  3       /* Ramdisk persistent storage */
+#define NR_LIST                4
 
 void mark_buffer_uptodate(struct buffer_head * bh, int on);
 
+extern inline void mark_buffer_protected(struct buffer_head * bh)
+{
+       if (!test_and_set_bit(BH_Protected, &bh->b_state)) {
+               if (bh->b_list != BUF_PROTECTED)
+                       refile_buffer(bh);
+       }
+}
+
 extern inline void mark_buffer_clean(struct buffer_head * bh)
 {
        if (test_and_clear_bit(BH_Dirty, &bh->b_state)) {
                if (bh->b_list == BUF_DIRTY)
                        refile_buffer(bh);
+               clear_bit(BH_Wait_IO, &bh->b_state);
        }
 }
 
@@ -941,6 +952,9 @@ extern void inode_setattr(struct inode *, struct iattr *);
 
 extern __u32 inode_generation_count;
 
+#define fs_down(sem)   do { current->fs_locks++; down(sem); } while (0)
+#define fs_up(sem)     do { up(sem); current->fs_locks--; } while (0)
+
 #endif /* __KERNEL__ */
 
 #endif
index 2094a4d19f7938ed95fd132f5d4c3bfe663ef077..f92fa3788c31c7209d38cdca1486803a3bc78d69 100644 (file)
@@ -50,10 +50,12 @@ extern inline void lock_super(struct super_block * sb)
        if (sb->s_lock)
                __wait_on_super(sb);
        sb->s_lock = 1;
+       current->fs_locks++;
 }
 
 extern inline void unlock_super(struct super_block * sb)
 {
+       current->fs_locks--;
        sb->s_lock = 0;
        wake_up(&sb->s_wait);
 }
index 84c587aca99c322c571e8ae708e36f09e1e89a79..ad89e46aa2f0390b0a7c43c93b6f3e890c17f508 100644 (file)
@@ -129,12 +129,8 @@ typedef struct page {
        struct wait_queue *wait;
        struct page **pprev_hash;
        struct buffer_head * buffers;
-       int age;
 } mem_map_t;
 
-#define PAGE_AGE_INITIAL 1     /* age for pages just mapped */
-#define PAGE_AGE_YOUNG 2       /* age for pages recently referenced */
-
 /* Page flag bit values */
 #define PG_locked               0
 #define PG_error                1
index 82b10f6ec1d60ba76b81cf76eacea13a5ed51414..fdecb4207d5c19834bea801aaa4861616bcc5cd0 100644 (file)
@@ -319,6 +319,8 @@ struct task_struct {
        struct files_struct *files;
 /* memory management info */
        struct mm_struct *mm;
+       struct list_head local_pages; int allocation_order, nr_local_pages;
+       int fs_locks;
 
 /* signal handlers */
        spinlock_t sigmask_lock;        /* Protects signal and blocked */
@@ -351,6 +353,7 @@ struct task_struct {
 #define PF_SIGNALED    0x00000400      /* killed by a signal */
 #define PF_MEMALLOC    0x00000800      /* Allocating memory */
 #define PF_VFORK       0x00001000      /* Wake up parent in mm_release */
+#define PF_FREE_PAGES  0x00002000      /* The current-> */
 
 #define PF_USEDFPU     0x00100000      /* task used FPU this quantum (SMP) */
 #define PF_DTRACE      0x00200000      /* delayed trace (used on m68k, i386) */
@@ -400,7 +403,7 @@ struct task_struct {
 /* tss */      INIT_TSS, \
 /* fs */       &init_fs, \
 /* files */    &init_files, \
-/* mm */       &init_mm, \
+/* mm */       &init_mm, { &init_task.local_pages, &init_task.local_pages}, 0, 0, 0, \
 /* signals */  SPIN_LOCK_UNLOCKED, &init_signals, {{0}}, {{0}}, NULL, &init_task.sigqueue, 0, 0, \
 /* exec cts */ 0,0, \
 /* oom */      0, \
index 5a7fe3c8b7bda4cf4a3b5e8143879df333339df3..2f4f06e2b733df234d6961aa9bfefd93538a3781 100644 (file)
@@ -80,7 +80,6 @@ static int init(void *);
 extern int bdflush(void *);
 extern int kupdate(void *);
 extern int kswapd(void *);
-extern int kpiod(void *);
 extern void kswapd_setup(void);
 extern unsigned long init_IRQ( unsigned long);
 extern void init_modules(void);
@@ -1584,7 +1583,6 @@ static void __init do_basic_setup(void)
        kernel_thread(kupdate, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
        /* Start the background pageout daemon. */
        kswapd_setup();
-       kernel_thread(kpiod, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
        kernel_thread(kswapd, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
 
 #if CONFIG_AP1000
index 62dd3c64599479ea529baa4e3cbd78d16c5b5711..4a33fa8fe85060781de7070a5f22bd75a6be7e3a 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -679,7 +679,7 @@ done:       /* pte_val(pte) == shp->shm_pages[idx] */
 }
 
 /*
- * Goes through counter = (shm_rss >> prio) present shm pages.
+ * Goes through counter = (shm_rss / prio) present shm pages.
  */
 static unsigned long swap_id = 0; /* currently being swapped */
 static unsigned long swap_idx = 0; /* next to swap */
@@ -693,7 +693,7 @@ int shm_swap (int prio, int gfp_mask)
        int loop = 0;
        int counter;
        
-       counter = shm_rss >> prio;
+       counter = shm_rss / prio;
        if (!counter || !(swap_nr = get_swap_page()))
                return 0;
 
index 70309a73c7cfa55273fa5d470db7284113339fad..70a98bb198c62ef20c30955537a9f036f6cae2e7 100644 (file)
@@ -665,6 +665,8 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
        p->lock_depth = -1;             /* -1 = no lock */
        p->start_time = jiffies;
 
+       INIT_LIST_HEAD(&p->local_pages);
+
        retval = -ENOMEM;
        /* copy all the process information */
        if (copy_files(clone_flags, p))
index 384bab05a42efef3d3481a24c5b126bc84210125..785135056c64d29a236dcefb578fbaa3a714814b 100644 (file)
@@ -19,7 +19,6 @@
 #include <linux/blkdev.h>
 #include <linux/file.h>
 #include <linux/swapctl.h>
-#include <linux/slab.h>
 #include <linux/init.h>
 
 #include <asm/pgtable.h>
@@ -36,25 +35,6 @@ unsigned long page_cache_size = 0;
 unsigned int page_hash_bits, page_hash_mask;
 struct page **page_hash_table;
 
-/* 
- * Define a request structure for outstanding page write requests
- * to the background page io daemon
- */
-
-struct pio_request 
-{
-       struct pio_request *    next;
-       struct file *           file;
-       unsigned long           offset;
-       unsigned long           page;
-};
-static struct pio_request *pio_first = NULL, **pio_last = &pio_first;
-static kmem_cache_t *pio_request_cache;
-static struct wait_queue *pio_wait = NULL;
-
-static inline void 
-make_pio_request(struct file *, unsigned long, unsigned long);
-
 static inline int sync_page(struct page *page)
 {
        struct inode *inode = page->inode;
@@ -150,14 +130,21 @@ int shrink_mmap(int priority, int gfp_mask)
        unsigned long limit = num_physpages;
        struct page * page;
        int count;
-       int nr_dirty = 0;
-       
+
        /* Make sure we scan all pages twice at priority 0. */
-       count = (limit << 1) >> priority;
+       count = limit / priority;
 
  refresh_clock:
        page = mem_map + clock;
        do {
+               int referenced;
+
+               if (current->need_resched) {
+                       current->state = TASK_RUNNING;
+                       schedule();
+                       goto refresh_clock;
+               }
+               
                /* This works even in the presence of PageSkip because
                 * the first two entries at the beginning of a hole will
                 * be marked, not just the first.
@@ -174,42 +161,39 @@ int shrink_mmap(int priority, int gfp_mask)
                        clock = page - mem_map;
                }
                
-               if (test_and_clear_bit(PG_referenced, &page->flags)) {
-                       page->age = PAGE_AGE_YOUNG;
-                       continue;
-               }
-
-               if (page->age > 0) {
-                       page->age--;
-                       continue;
-               }
+               count--;
 
                /* We can't free pages unless there's just one user */
                if (atomic_read(&page->count) != 1)
                        continue;
 
+               referenced = test_and_clear_bit(PG_referenced, &page->flags);
+
                if (PageLocked(page))
                        continue;
 
-               if ((gfp_mask & __GFP_DMA) && !PageDMA(page))
+               if ((gfp_mask & __GFP_DMA) && !PageDMA(page)) {
+                       count++;
                        continue;
+               }
 
-               /* Is it a page swap page? Drop it, its old. */
+               /*
+                * Is it a page swap page? If so, we want to
+                * drop it if it is no longer used, even if it
+                * were to be marked referenced..
+                */
                if (PageSwapCache(page)) {
+                       if (referenced && swap_count(page->offset) != 1)
+                               continue;
                        delete_from_swap_cache(page);
                        return 1;
                }       
 
+               if (referenced)
+                       continue;
+
                /* Is it a buffer page? */
                if (page->buffers) {
-                       /*
-                        * Wait for async IO to complete
-                        * at each 64 buffers
-                        */ 
-
-                       int wait = ((gfp_mask & __GFP_IO) 
-                               && (!(nr_dirty++ % 64)));
-
                        if (buffer_under_min())
                                continue;
                        /*
@@ -217,10 +201,8 @@ int shrink_mmap(int priority, int gfp_mask)
                         * throttling.
                         */
 
-                       if (!try_to_free_buffers(page, wait)) { 
-                               if(--count < 0) break;
+                       if (!try_to_free_buffers(page, gfp_mask))
                                goto refresh_clock;
-                       }
                        return 1;
                }
 
@@ -231,8 +213,7 @@ int shrink_mmap(int priority, int gfp_mask)
                        remove_inode_page(page);
                        return 1;
                }
-
-       } while (--count > 0);
+       } while (count > 0);
        return 0;
 }
 
@@ -299,7 +280,7 @@ static inline void add_to_page_cache(struct page * page,
        struct page **hash)
 {
        atomic_inc(&page->count);
-       page->flags = (page->flags & ~((1 << PG_uptodate) | (1 << PG_error))) | (1 << PG_referenced);
+       page->flags = page->flags & ~((1 << PG_uptodate) | (1 << PG_error) | (1 << PG_referenced));
        page->offset = offset;
        add_page_to_inode_queue(inode, page);
        __add_page_to_hash_queue(page, hash);
@@ -878,12 +859,12 @@ static int file_send_actor(read_descriptor_t * desc, const char *area, unsigned
 
        if (size > count)
                size = count;
-       down(&inode->i_sem);
+       fs_down(&inode->i_sem);
        old_fs = get_fs();
        set_fs(KERNEL_DS);
        written = file->f_op->write(file, area, size, &file->f_pos);
        set_fs(old_fs);
-       up(&inode->i_sem);
+       fs_up(&inode->i_sem);
        if (written < 0) {
                desc->error = written;
                written = 0;
@@ -1160,8 +1141,7 @@ static inline int do_write_page(struct inode * inode, struct file * file,
 
 static int filemap_write_page(struct vm_area_struct * vma,
                              unsigned long offset,
-                             unsigned long page,
-                             int wait)
+                             unsigned long page)
 {
        int result;
        struct file * file;
@@ -1179,20 +1159,9 @@ static int filemap_write_page(struct vm_area_struct * vma,
         * and file could be released ... increment the count to be safe.
         */
        file->f_count++;
-
-       /* 
-        * If this is a swapping operation rather than msync(), then
-        * leave the actual IO, and the restoration of the file count,
-        * to the kpiod thread.  Just queue the request for now.
-        */
-       if (!wait) {
-               make_pio_request(file, offset, page);
-               return 0;
-       }
-       
-       down(&inode->i_sem);
+       fs_down(&inode->i_sem);
        result = do_write_page(inode, file, (const char *) page, offset);
-       up(&inode->i_sem);
+       fs_up(&inode->i_sem);
        fput(file);
        return result;
 }
@@ -1205,7 +1174,7 @@ static int filemap_write_page(struct vm_area_struct * vma,
  */
 int filemap_swapout(struct vm_area_struct * vma, struct page * page)
 {
-       return filemap_write_page(vma, page->offset, page_address(page), 0);
+       return filemap_write_page(vma, page->offset, page_address(page));
 }
 
 static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
@@ -1242,7 +1211,7 @@ static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
                        return 0;
                }
        }
-       error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, page, 1);
+       error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, page);
        page_cache_free(page);
        return error;
 }
@@ -1414,9 +1383,9 @@ static int msync_interval(struct vm_area_struct * vma,
                        if (file) {
                                struct dentry * dentry = file->f_dentry;
                                struct inode * inode = dentry->d_inode;
-                               down(&inode->i_sem);
+                               fs_down(&inode->i_sem);
                                error = file_fsync(file, dentry);
-                               up(&inode->i_sem);
+                               fs_up(&inode->i_sem);
                        }
                }
                return error;
@@ -1745,130 +1714,6 @@ void put_cached_page(unsigned long addr)
        page_cache_release(page);
 }
 
-
-/* Add request for page IO to the queue */
-
-static inline void put_pio_request(struct pio_request *p)
-{
-       *pio_last = p;
-       p->next = NULL;
-       pio_last = &p->next;
-}
-
-/* Take the first page IO request off the queue */
-
-static inline struct pio_request * get_pio_request(void)
-{
-       struct pio_request * p = pio_first;
-       pio_first = p->next;
-       if (!pio_first)
-               pio_last = &pio_first;
-       return p;
-}
-
-/* Make a new page IO request and queue it to the kpiod thread */
-
-static inline void make_pio_request(struct file *file,
-                                   unsigned long offset,
-                                   unsigned long page)
-{
-       struct pio_request *p;
-
-       atomic_inc(&page_cache_entry(page)->count);
-
-       /* 
-        * We need to allocate without causing any recursive IO in the
-        * current thread's context.  We might currently be swapping out
-        * as a result of an allocation made while holding a critical
-        * filesystem lock.  To avoid deadlock, we *MUST* not reenter
-        * the filesystem in this thread.
-        *
-        * We can wait for kswapd to free memory, or we can try to free
-        * pages without actually performing further IO, without fear of
-        * deadlock.  --sct
-        */
-
-       while ((p = kmem_cache_alloc(pio_request_cache, GFP_BUFFER)) == NULL) {
-               if (try_to_free_pages(__GFP_WAIT))
-                       continue;
-               current->state = TASK_INTERRUPTIBLE;
-               schedule_timeout(HZ/10);
-       }
-       
-       p->file   = file;
-       p->offset = offset;
-       p->page   = page;
-
-       put_pio_request(p);
-       wake_up(&pio_wait);
-}
-
-
-/*
- * This is the only thread which is allowed to write out filemap pages
- * while swapping.
- * 
- * To avoid deadlock, it is important that we never reenter this thread.
- * Although recursive memory allocations within this thread may result
- * in more page swapping, that swapping will always be done by queuing
- * another IO request to the same thread: we will never actually start
- * that IO request until we have finished with the current one, and so
- * we will not deadlock.  
- */
-
-int kpiod(void * unused)
-{
-       struct task_struct *tsk = current;
-       struct wait_queue wait = { tsk, };
-       struct inode * inode;
-       struct dentry * dentry;
-       struct pio_request * p;
-       
-       tsk->session = 1;
-       tsk->pgrp = 1;
-       strcpy(tsk->comm, "kpiod");
-       sigfillset(&tsk->blocked);
-       init_waitqueue(&pio_wait);
-       /*
-        * Mark this task as a memory allocator - we don't want to get caught
-        * up in the regular mm freeing frenzy if we have to allocate memory
-        * in order to write stuff out.
-        */
-       tsk->flags |= PF_MEMALLOC;
-
-       lock_kernel();
-       
-       pio_request_cache = kmem_cache_create("pio_request", 
-                                             sizeof(struct pio_request),
-                                             0, SLAB_HWCACHE_ALIGN, 
-                                             NULL, NULL);
-       if (!pio_request_cache)
-               panic ("Could not create pio_request slab cache");
-
-       while (1) {
-               tsk->state = TASK_INTERRUPTIBLE;
-               add_wait_queue(&pio_wait, &wait);
-               if (!pio_first)
-                       schedule();
-               remove_wait_queue(&pio_wait, &wait);
-               tsk->state = TASK_RUNNING;
-
-               while (pio_first) {
-                       p = get_pio_request();
-                       dentry = p->file->f_dentry;
-                       inode = dentry->d_inode;
-                       
-                       down(&inode->i_sem);
-                       do_write_page(inode, p->file,
-                                     (const char *) p->page, p->offset);
-                       up(&inode->i_sem);
-                       fput(p->file);
-                       page_cache_free(p->page);
-                       kmem_cache_free(pio_request_cache, p);
-               }
-       }
-}
-
 void __init page_cache_init(unsigned long memory_size)
 {
        unsigned long htable_size;
index 8212c29bb780bca48e7c3e287f1d1e9e519acea0..533cca3ad461d2c4846b3bb8cc4e4db487bbbdf7 100644 (file)
@@ -93,34 +93,69 @@ static inline void remove_mem_queue(struct page * entry)
  */
 spinlock_t page_alloc_lock = SPIN_LOCK_UNLOCKED;
 
+#define list(x) (mem_map+(x))
+#define __free_pages_ok(map_nr, mask, area, index)             \
+       nr_free_pages -= (mask);                                \
+       while ((mask) + (1 << (NR_MEM_LISTS-1))) {              \
+               if (!test_and_change_bit((index), (area)->map)) \
+                       break;                                  \
+               (area)->count--;                                \
+               remove_mem_queue(list((map_nr) ^ -(mask)));     \
+               (mask) <<= 1;                                   \
+               (area)++;                                       \
+               (index) >>= 1;                                  \
+               (map_nr) &= (mask);                             \
+       }                                                       \
+       add_mem_queue(area, list(map_nr));
+
+static void free_local_pages(struct page * page) {
+       unsigned long order = page->offset;
+       unsigned int type = PageDMA(page) ? 1 : 0;
+       struct free_area_struct *area;
+       unsigned long map_nr = page - mem_map;
+       unsigned long mask = (~0UL) << order;
+       unsigned long index = map_nr >> (1 + order);
+
+       area = free_area[type] + order;
+       __free_pages_ok(map_nr, mask, area, index);
+}
+
 static inline void free_pages_ok(unsigned long map_nr, unsigned long order, unsigned type)
 {
-       struct free_area_struct *area = free_area[type] + order;
-       unsigned long index = map_nr >> (1 + order);
-       unsigned long mask = (~0UL) << order;
+       struct free_area_struct *area;
+       unsigned long index;
+       unsigned long mask;
        unsigned long flags;
+       struct page * page;
 
-       spin_lock_irqsave(&page_alloc_lock, flags);
-
-#define list(x) (mem_map+(x))
+       if (current->flags & PF_FREE_PAGES)
+               goto local_freelist;
+ back_local_freelist:
 
+       index = map_nr >> (1 + order);
+       mask = (~0UL) << order;
        map_nr &= mask;
-       nr_free_pages -= mask;
-       while (mask + (1 << (NR_MEM_LISTS-1))) {
-               if (!test_and_change_bit(index, area->map))
-                       break;
-               area->count--;
-               remove_mem_queue(list(map_nr ^ -mask));
-               mask <<= 1;
-               area++;
-               index >>= 1;
-               map_nr &= mask;
-       }
-       add_mem_queue(area, list(map_nr));
-
-#undef list
 
+       spin_lock_irqsave(&page_alloc_lock, flags);
+       area = free_area[type] + order;
+       __free_pages_ok(map_nr, mask, area, index);
        spin_unlock_irqrestore(&page_alloc_lock, flags);
+       return;
+
+ local_freelist:
+       /*
+        * This is a little subtle: if the allocation order
+        * wanted is major than zero we'd better take all the pages
+        * local since we must deal with fragmentation too and we
+        * can't rely on the nr_local_pages information.
+        */
+       if (current->nr_local_pages && !current->allocation_order)
+               goto back_local_freelist;
+
+       page = mem_map + map_nr;
+       list_add((struct list_head *) page, &current->local_pages);
+       page->offset = order;
+       current->nr_local_pages++;
 }
 
 void __free_pages(struct page *page, unsigned long order)
@@ -129,7 +164,6 @@ void __free_pages(struct page *page, unsigned long order)
                if (PageSwapCache(page))
                        panic ("Freeing swap cache page");
                page->flags &= ~(1 << PG_referenced);
-               page->age = PAGE_AGE_INITIAL;
                free_pages_ok(page - mem_map, order, PageDMA(page) ? 1 : 0);
                return;
        }
@@ -180,13 +214,32 @@ do { unsigned long size = 1 << high; \
        atomic_set(&map->count, 1); \
 } while (0)
 
+static void refile_local_pages(void)
+{
+       if (current->nr_local_pages) {
+               struct page * page;
+               struct list_head * entry;
+               int nr_pages = current->nr_local_pages;
+
+               while ((entry = current->local_pages.next) != &current->local_pages) {
+                       list_del(entry);
+                       page = (struct page *) entry;
+                       free_local_pages(page);
+                       if (!nr_pages--)
+                               panic("__get_free_pages local_pages list corrupted I");
+               }
+               if (nr_pages)
+                       panic("__get_free_pages local_pages list corrupted II");
+               current->nr_local_pages = 0;
+       }
+}
+
 unsigned long __get_free_pages(int gfp_mask, unsigned long order)
 {
        unsigned long flags;
-       static atomic_t free_before_allocate = ATOMIC_INIT(0);
 
        if (order >= NR_MEM_LISTS)
-               goto nopage;
+               goto out;
 
 #ifdef ATOMIC_MEMORY_DEBUGGING
        if ((gfp_mask & __GFP_WAIT) && in_interrupt()) {
@@ -195,26 +248,24 @@ unsigned long __get_free_pages(int gfp_mask, unsigned long order)
                        printk("gfp called nonatomically from interrupt %p\n",
                                __builtin_return_address(0));
                }
-               goto nopage;
+               goto out;
        }
 #endif
 
+       /*
+        * Acquire lock before reading nr_free_pages to make sure it
+        * won't change from under us.
+        */
+       spin_lock_irqsave(&page_alloc_lock, flags);
+
        /*
         * If this is a recursive call, we'd better
         * do our best to just allocate things without
         * further thought.
         */
        if (!(current->flags & PF_MEMALLOC)) {
-               int freed;
                extern struct wait_queue * kswapd_wait;
 
-               /* Somebody needs to free pages so we free some of our own. */
-               if (atomic_read(&free_before_allocate)) {
-                       current->flags |= PF_MEMALLOC;
-                       try_to_free_pages(gfp_mask);
-                       current->flags &= ~PF_MEMALLOC;
-               }
-
                if (nr_free_pages > freepages.low)
                        goto ok_to_allocate;
 
@@ -224,35 +275,44 @@ unsigned long __get_free_pages(int gfp_mask, unsigned long order)
                /* Do we have to block or can we proceed? */
                if (nr_free_pages > freepages.min)
                        goto ok_to_allocate;
-
-               current->flags |= PF_MEMALLOC;
-               atomic_inc(&free_before_allocate);
-               freed = try_to_free_pages(gfp_mask);
-               atomic_dec(&free_before_allocate);
-               current->flags &= ~PF_MEMALLOC;
-
-               /*
-                * Re-check we're still low on memory after we blocked
-                * for some time. Somebody may have released lots of
-                * memory from under us while we was trying to free
-                * the pages. We check against pages_high to be sure
-                * to succeed only if lots of memory is been released.
-                */
-               if (nr_free_pages > freepages.high)
-                       goto ok_to_allocate;
-
-               if (!freed && !(gfp_mask & (__GFP_MED | __GFP_HIGH)))
-                       goto nopage;
+               if (gfp_mask & __GFP_WAIT) {
+                       int freed;
+                       /*
+                        * If the task is ok to sleep it's fine also
+                        * if we release irq here.
+                        */
+                       spin_unlock_irq(&page_alloc_lock);
+
+                       current->flags |= PF_MEMALLOC|PF_FREE_PAGES;
+                       current->allocation_order = order;
+                       freed = try_to_free_pages(gfp_mask);
+                       current->flags &= ~(PF_MEMALLOC|PF_FREE_PAGES);
+
+                       spin_lock_irq(&page_alloc_lock);
+                       refile_local_pages();
+
+                       /*
+                        * Re-check we're still low on memory after we blocked
+                        * for some time. Somebody may have released lots of
+                        * memory from under us while we was trying to free
+                        * the pages. We check against pages_high to be sure
+                        * to succeed only if lots of memory is been released.
+                        */
+                       if (nr_free_pages > freepages.high)
+                               goto ok_to_allocate;
+
+                       if (!freed && !(gfp_mask & (__GFP_MED | __GFP_HIGH)))
+                               goto nopage;
+               }
        }
 ok_to_allocate:
-       spin_lock_irqsave(&page_alloc_lock, flags);
        /* if it's not a dma request, try non-dma first */
        if (!(gfp_mask & __GFP_DMA))
                RMQUEUE_TYPE(order, 0);
        RMQUEUE_TYPE(order, 1);
+ nopage:
        spin_unlock_irqrestore(&page_alloc_lock, flags);
-
-nopage:
+ out:
        return 0;
 }
 
index 8c5e7176c5aa668ccfc4ad0997af4f47380386ad..7c27e5b338db776516948ade0884b1373d641600 100644 (file)
@@ -63,6 +63,7 @@ int add_to_swap_cache(struct page *page, unsigned long entry)
                return 0;
        }
        atomic_inc(&page->count);
+       page->flags = page->flags & ~((1 << PG_uptodate) | (1 << PG_error) | (1 << PG_referenced));
        page->inode = &swapper_inode;
        page->offset = entry;
        add_page_to_hash_queue(page, &swapper_inode, entry);
index 86e6b1fe9a87e76a27bab5e9385b0edf5ae3c7bd..81ba9a55ba624c55c99ad91106c4668e159f9d5c 100644 (file)
@@ -96,6 +96,9 @@ drop_pte:
         * some real work in the future in "shrink_mmap()".
         */
        if (!pte_dirty(pte)) {
+               if (page_map->inode && pgcache_under_min())
+                       /* unmapping this page would be useless */
+                       return 0;
                flush_cache_page(vma, address);
                pte_clear(page_table);
                goto drop_pte;
@@ -106,7 +109,7 @@ drop_pte:
         * we cannot do I/O! Avoid recursing on FS
         * locks etc.
         */
-       if (!(gfp_mask & __GFP_IO))
+       if (!(gfp_mask & __GFP_IO) || current->fs_locks)
                return 0;
 
        /*
@@ -208,6 +211,8 @@ static inline int swap_out_pmd(struct task_struct * tsk, struct vm_area_struct *
                result = try_to_swap_out(tsk, vma, address, pte, gfp_mask);
                if (result)
                        return result;
+               if (current->need_resched)
+                       return 2;
                address += PAGE_SIZE;
                pte++;
        } while (address < end);
@@ -327,7 +332,7 @@ static int swap_out(unsigned int priority, int gfp_mask)
         * Think of swap_cnt as a "shadow rss" - it tells us which process
         * we want to page out (always try largest first).
         */
-       counter = nr_tasks / (priority+1);
+       counter = nr_tasks / priority;
        if (counter < 1)
                counter = 1;
 
@@ -361,8 +366,13 @@ static int swap_out(unsigned int priority, int gfp_mask)
                        goto out;
                }
 
-               if (swap_out_process(pbest, gfp_mask))
+               switch (swap_out_process(pbest, gfp_mask)) {
+               case 1:
                        return 1;
+               case 2:
+                       current->state = TASK_RUNNING;
+                       schedule();
+               }
        }
 out:
        return 0;
@@ -377,11 +387,9 @@ out:
  * cluster them so that we get good swap-out behaviour. See
  * the "free_memory()" macro for details.
  */
-static int do_try_to_free_pages(unsigned int gfp_mask)
+int try_to_free_pages(unsigned int gfp_mask)
 {
        int priority;
-       int ret = 0;
-       int swapcount;
        int count = SWAP_CLUSTER_MAX;
 
        lock_kernel();
@@ -389,41 +397,34 @@ static int do_try_to_free_pages(unsigned int gfp_mask)
        /* Always trim SLAB caches when memory gets low. */
        kmem_cache_reap(gfp_mask);
 
-       priority = 6;
+       priority = 5;
        do {
                while (shrink_mmap(priority, gfp_mask)) {
-                       ret = 1;
                        if (!--count)
                                goto done;
                }
 
                /* Try to get rid of some shared memory pages.. */
-               if (gfp_mask & __GFP_IO) {
+               if (gfp_mask & __GFP_IO && !current->fs_locks) {
                        while (shm_swap(priority, gfp_mask)) {
-                               ret = 1;
                                if (!--count)
                                        goto done;
                        }
                }
 
                /* Then, try to page stuff out.. */
-               swapcount = count;
                while (swap_out(priority, gfp_mask)) {
-                       ret = 1;
-                       if (!--swapcount)
-                               break;
+                       if (!--count)
+                               goto done;
                }
 
                shrink_dcache_memory(priority, gfp_mask);
-       } while (--priority >= 0);
+       } while (--priority > 0);
 done:
        unlock_kernel();
 
-       if (!ret)
-               printk("VM: do_try_to_free_pages failed for %s...\n",
-                               current->comm);
        /* Return success if we freed a page. */
-       return ret;
+       return priority > 0;
 }
 
 /*
@@ -499,7 +500,7 @@ int kswapd(void *unused)
 
                while (nr_free_pages < freepages.high)
                {
-                       if (do_try_to_free_pages(GFP_KSWAPD))
+                       if (try_to_free_pages(GFP_KSWAPD))
                        {
                                if (tsk->need_resched)
                                        schedule();
@@ -510,17 +511,3 @@ int kswapd(void *unused)
                }
        }
 }
-
-/*
- * Called by non-kswapd processes when kswapd really cannot
- * keep up with the demand for free memory.
- */
-int try_to_free_pages(unsigned int gfp_mask)
-{
-       int retval = 1;
-
-       if (gfp_mask & __GFP_WAIT)
-               retval = do_try_to_free_pages(gfp_mask);
-       return retval;
-}
-