Import 2.3.7pre3

author Linus Torvalds <torvalds@linuxfoundation.org>

Fri, 23 Nov 2007 20:25:28 +0000 (15:25 -0500)

committer Linus Torvalds <torvalds@linuxfoundation.org>

Fri, 23 Nov 2007 20:25:28 +0000 (15:25 -0500)
author Linus Torvalds <torvalds@linuxfoundation.org>
Fri, 23 Nov 2007 20:25:28 +0000 (15:25 -0500)
committer Linus Torvalds <torvalds@linuxfoundation.org>
Fri, 23 Nov 2007 20:25:28 +0000 (15:25 -0500)
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c

index 3e11bf7d60d56d2c0f13ccc1840d84db27904e55..33497880186c98d8290b774b368e5577bee097f8 100644 (file)
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -651,9 +651,7 @@ static inline ssize_t do_tty_write(
         ssize_t ret = 0, written = 0;
         struct inode *inode = file->f_dentry->d_inode;
         
-       up(&inode->i_sem);
-       if (down_interruptible(&inode->i_atomic_write)) {
-               down(&inode->i_sem);
+       if (down_interruptible(&inode->i_sem)) {
                 return -ERESTARTSYS;
         }
         for (;;) {
@@ -678,8 +676,7 @@ static inline ssize_t do_tty_write(
                 file->f_dentry->d_inode->i_mtime = CURRENT_TIME;
                 ret = written;
         }
-       up(&inode->i_atomic_write);
-       down(&inode->i_sem);
+       up(&inode->i_sem);
         return ret;
  }
  
diff --git a/drivers/usb/uhci.c b/drivers/usb/uhci.c

index 73aab5fa10132a4fa9ed5e75ce4dbb764df81ddf..2f8010ed1c902aff1554865a800a8a3d881669c3 100644 (file)
--- a/drivers/usb/uhci.c
+++ b/drivers/usb/uhci.c
@@ -1264,7 +1264,8 @@ static void uhci_interrupt_notify(struct uhci *uhci)
                                         struct uhci_qh *interrupt_qh = td->qh;
  
                                         usb_dotoggle(td->dev, usb_pipeendpoint(td->info));
-                                       td->info |= 1 << 19; /* toggle between data0 and data1 */
+                                       td->info &= ~(1 << 19); /* clear data toggle */
+                                       td->info |= usb_gettoggle(td->dev, usb_pipeendpoint(td->info)) << 19; /* toggle between data0 and data1 */
                                         td->status = (td->status & 0x2f000000) | (1 << 23) | (1 << 24); /* active */
  
                                         /* Remove then readd? Is that necessary */
diff --git a/fs/buffer.c b/fs/buffer.c

index 9ea490ef43a51a730566d7e1112928424f7709f3..0f5302a7d2bc6a2c8d8ee7796da9fbc647500036 100644 (file)
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -109,7 +109,7 @@ union bdflush_param {
                 int dummy3;    /* unused */
         } b_un;
         unsigned int data[N_PARAM];
-} bdf_prm = {{90, 500, 64, 256, 15, 30*HZ, 5*HZ, 1884, 2}};
+} bdf_prm = {{100, 500, 64, 256, 15, 30*HZ, 5*HZ, 1884, 2}};
  
  /* These are the min and max parameter values that we will allow to be assigned */
  int bdflush_min[N_PARAM] = {  0,  10,    5,   25,  0,   1*HZ,   1*HZ, 1, 1};
@@ -422,6 +422,24 @@ void invalidate_buffers(kdev_t dev)
  #define _hashfn(dev,block) (((unsigned)(HASHDEV(dev)^block)) & bh_hash_mask)
  #define hash(dev,block) hash_table[_hashfn(dev,block)]
  
+static void insert_into_hash_list(struct buffer_head * bh)
+{
+       bh->b_next = NULL;
+       bh->b_pprev = NULL;
+       if (bh->b_dev) {
+               struct buffer_head **bhp = &hash(bh->b_dev, bh->b_blocknr);
+               struct buffer_head *next = *bhp;
+
+               if (next) {
+                       bh->b_next = next;
+                       next->b_pprev = &bh->b_next;
+               }
+               *bhp = bh;
+               bh->b_pprev = bhp;
+               nr_hashed_buffers++;
+       }
+}
+
  static inline void remove_from_hash_queue(struct buffer_head * bh)
  {
         struct buffer_head **pprev = bh->b_pprev;
@@ -433,16 +451,36 @@ static inline void remove_from_hash_queue(struct buffer_head * bh)
                 }
                 *pprev = next;
                 bh->b_pprev = NULL;
+               nr_hashed_buffers--;
         }
-       nr_hashed_buffers--;
  }
  
-static inline void remove_from_lru_list(struct buffer_head * bh)
+static void insert_into_lru_list(struct buffer_head * bh)
  {
-       if (!(bh->b_prev_free) || !(bh->b_next_free)) {
-               printk("VFS: LRU block list corrupted\n");
-               *(int*)0 = 0;
+       struct buffer_head **bhp = &lru_list[bh->b_list];
+
+       if(!*bhp) {
+               *bhp = bh;
+               bh->b_prev_free = bh;
         }
+
+       if (bh->b_next_free)
+               panic("VFS: buffer LRU pointers corrupted");
+
+       bh->b_next_free = *bhp;
+       bh->b_prev_free = (*bhp)->b_prev_free;
+       (*bhp)->b_prev_free->b_next_free = bh;
+       (*bhp)->b_prev_free = bh;
+
+       nr_buffers++;
+       nr_buffers_type[bh->b_list]++;
+}
+
+static inline void remove_from_lru_list(struct buffer_head * bh)
+{
+       if (!(bh->b_prev_free) || !(bh->b_next_free))
+               return;
+
         if (bh->b_dev == B_FREE) {
                 printk("LRU list corrupted");
                 *(int*)0 = 0;
@@ -455,6 +493,9 @@ static inline void remove_from_lru_list(struct buffer_head * bh)
         if (lru_list[bh->b_list] == bh)
                  lru_list[bh->b_list] = NULL;
         bh->b_next_free = bh->b_prev_free = NULL;
+
+       nr_buffers--;
+       nr_buffers_type[bh->b_list]--;
  }
  
  static inline void remove_from_free_list(struct buffer_head * bh)
@@ -479,15 +520,8 @@ static inline void remove_from_free_list(struct buffer_head * bh)
  
  static void remove_from_queues(struct buffer_head * bh)
  {
-       if(bh->b_dev == B_FREE) {
-               remove_from_free_list(bh); /* Free list entries should not be
-                                             in the hash queue */
-               goto out;
-       }
-       nr_buffers_type[bh->b_list]--;
         remove_from_hash_queue(bh);
         remove_from_lru_list(bh);
-out:
  }
  
  static inline void put_last_free(struct buffer_head * bh)
@@ -510,69 +544,6 @@ static inline void put_last_free(struct buffer_head * bh)
         }
  }
  
-static void insert_into_queues(struct buffer_head * bh)
-{
-       /* put at end of free list */
-       if(bh->b_dev == B_FREE) {
-               put_last_free(bh);
-       } else {
-               struct buffer_head **bhp = &lru_list[bh->b_list];
-
-               if(!*bhp) {
-                       *bhp = bh;
-                       bh->b_prev_free = bh;
-               }
-
-               if (bh->b_next_free)
-                       panic("VFS: buffer LRU pointers corrupted");
-
-               bh->b_next_free = *bhp;
-               bh->b_prev_free = (*bhp)->b_prev_free;
-               (*bhp)->b_prev_free->b_next_free = bh;
-               (*bhp)->b_prev_free = bh;
-
-               nr_buffers_type[bh->b_list]++;
-
-               /* Put the buffer in new hash-queue if it has a device. */
-               bh->b_next = NULL;
-               bh->b_pprev = NULL;
-               if (bh->b_dev) {
-                       struct buffer_head **bhp = &hash(bh->b_dev, bh->b_blocknr);
-                       struct buffer_head *next = *bhp;
-
-                       if (next) {
-                               bh->b_next = next;
-                               next->b_pprev = &bh->b_next;
-                       }
-                       *bhp = bh;
-                       bh->b_pprev = bhp;
-               }
-               nr_hashed_buffers++;
-       }
-}
-
-static void insert_into_dirty_queue(struct buffer_head * bh)
-{
-       struct buffer_head **bhp;
-
-
-       bhp = &lru_list[BUF_DIRTY];
-       if(!*bhp) {
-               *bhp = bh;
-               bh->b_prev_free = bh;
-       }
-       if (bh->b_next_free)
-               BUG();
-
-       bh->b_next_free = *bhp;
-       bh->b_prev_free = (*bhp)->b_prev_free;
-       (*bhp)->b_prev_free->b_next_free = bh;
-       (*bhp)->b_prev_free = bh;
-
-       nr_buffers++;
-       nr_buffers_type[BUF_DIRTY]++;
-}
-
  struct buffer_head * find_buffer(kdev_t dev, int block, int size)
  {              
         struct buffer_head * next;
@@ -673,7 +644,7 @@ void set_blocksize(kdev_t dev, int size)
                         }
                         remove_from_queues(bh);
                         bh->b_dev=B_FREE;
-                       insert_into_queues(bh);
+                       put_last_free(bh);
                 }
         }
  }
@@ -693,7 +664,6 @@ static void refill_freelist(int size)
  void init_buffer(struct buffer_head *bh, kdev_t dev, int block,
                  bh_end_io_t *handler, void *dev_id)
  {
-       bh->b_count = 1;
         bh->b_list = BUF_CLEAN;
         bh->b_flushtime = 0;
         bh->b_dev = dev;
@@ -743,8 +713,12 @@ get_free:
          * and that it's unused (b_count=0), unlocked, and clean.
          */
         init_buffer(bh, dev, block, end_buffer_io_sync, NULL);
-       bh->b_state=0;
-       insert_into_queues(bh);
+       bh->b_count = 1;
+       bh->b_state = 0;
+
+       /* Insert the buffer into the regular lists */
+       insert_into_lru_list(bh);
+       insert_into_hash_list(bh);
         goto out;
  
         /*
@@ -781,9 +755,9 @@ void set_writetime(struct buffer_head * buf, int flag)
   */
  static void file_buffer(struct buffer_head *bh, int list)
  {
-       remove_from_queues(bh);
+       remove_from_lru_list(bh);
         bh->b_list = list;
-       insert_into_queues(bh);
+       insert_into_lru_list(bh);
  }
  
  /*
@@ -813,7 +787,7 @@ static inline void balance_dirty (kdev_t dev)
   * A buffer may need to be moved from one buffer list to another
   * (e.g. in case it is not shared any more). Handle this.
   */
-void __refile_buffer(struct buffer_head * buf)
+void refile_buffer(struct buffer_head * buf)
  {
         int dispose;
  
@@ -829,7 +803,7 @@ void __refile_buffer(struct buffer_head * buf)
                 dispose = BUF_CLEAN;
         if(dispose != buf->b_list) {
                 file_buffer(buf, dispose);
-               if(dispose == BUF_DIRTY)
+               if (dispose == BUF_DIRTY)
                         balance_dirty(buf->b_dev);
         }
  }
@@ -1282,89 +1256,143 @@ static int create_page_buffers (int rw, struct page *page, kdev_t dev, int b[],
         }
         tail->b_this_page = head;
         page->buffers = head;
+       get_page(page);
         return 0;
  }
  
  /*
- * Can the buffer be thrown out?
+ * We don't have to release all buffers here, but
+ * we have to be sure that no dirty buffer is left
+ * and no IO is going on (no buffer is locked), because
+ * we have truncated the file and are going to free the
+ * blocks on-disk..
   */
-#define BUFFER_BUSY_BITS       ((1<<BH_Dirty) | (1<<BH_Lock) | (1<<BH_Protected))
-#define buffer_busy(bh)        ((bh)->b_count || ((bh)->b_state & BUFFER_BUSY_BITS))
-
-static int page_idle(struct page *page, int sync)
+int generic_block_flushpage(struct inode *inode, struct page *page, unsigned long offset)
  {
         struct buffer_head *head, *bh, *next;
+       unsigned int curr_off = 0;
+
+
+       if (!PageLocked(page))
+               BUG();
+       if (!page->buffers)
+               return 0;
  
         head = page->buffers;
         bh = head;
         do {
+               unsigned int next_off = curr_off + bh->b_size;
                 next = bh->b_this_page;
  
-               if (bh->b_blocknr) {
-                       if (buffer_locked(bh)) {
-                               wait_on_buffer(bh);
-                               return 0;
-                       }
-                       if (buffer_dirty(bh)) {
-                               if (sync) {
+               /*
+                * is this block fully flushed?
+                */
+               if (offset <= curr_off) {
+                       if (bh->b_blocknr) {
+                               if (buffer_locked(bh))
                                         wait_on_buffer(bh);
-                                       ll_rw_block(WRITE, 1, &bh);
-                                       return 0;
-                               } else
-                                       clear_bit(BH_Dirty, &bh->b_state);
+                               clear_bit(BH_Dirty, &bh->b_state);
+                               if(bh->b_dev == B_FREE)
+                                       BUG();
+                               remove_from_lru_list(bh);
+                               bh->b_blocknr = 0;
                         }
                 }
+               curr_off = next_off;
                 bh = next;
         } while (bh != head);
-       return 1;
+
+       /*
+        * subtle. We release buffer-heads only if this is
+        * the 'final' flushpage. We invalidate the bmap
+        * cached value in all cases.
+        */
+       if (!offset) {
+               buffermem += PAGE_SIZE;
+               try_to_free_buffers(page);
+       }
+
+       return 0;
  }
  
-/*
- * We dont have to release all buffers here, but
- * we have to be sure that no dirty buffer is left
- * and no IO is going on (no buffer is locked), because
- * we are going to free the underlying page.
- */
-int generic_block_flushpage(struct inode *inode, struct page *page, int sync)
+static inline void create_empty_buffers (struct page *page,
+                       struct inode *inode, unsigned long blocksize)
  {
-       struct buffer_head *head, *bh, *next;
+       struct buffer_head *bh, *head, *tail;
  
+       head = create_buffers(page_address(page), blocksize, 1);
+       if (page->buffers)
+               BUG();
+
+       bh = head;
+       do {
+               bh->b_dev = inode->i_dev;
+               tail = bh;
+               bh = bh->b_this_page;
+       } while (bh);
+       tail->b_this_page = head;
+       page->buffers = head;
+}
+
+int block_write_full_page (struct file *file, struct page *page, fs_getblock_t fs_get_block)
+{
+       struct dentry *dentry = file->f_dentry;
+       struct inode *inode = dentry->d_inode;
+       int err, created, i;
+       unsigned long block, phys, offset;
+       struct buffer_head *bh, *head;
  
         if (!PageLocked(page))
                 BUG();
+
         if (!page->buffers)
-               BUG();
+               create_empty_buffers(page, inode, inode->i_sb->s_blocksize);
+       head = page->buffers;
  
-       while (!page_idle(page, sync));
+       offset = page->offset;
+       block = offset >> inode->i_sb->s_blocksize_bits;
+
+       // FIXME: currently we assume page alignment.
+       if (offset & (PAGE_SIZE-1))
+               BUG();
  
-       head = page->buffers;
         bh = head;
+       i = 0;
         do {
-               next = bh->b_this_page;
-               if (bh->b_blocknr) {
-                       if(bh->b_dev == B_FREE) {
-                               remove_from_free_list(bh);
-                       } else {
-                               if (bh->b_list == BUF_DIRTY) {
-                                       nr_buffers--;
-                                       nr_buffers_type[BUF_DIRTY]--;
-                                       remove_from_lru_list(bh);
-                               }
-                       }
+               if (!bh)
+                       BUG();
+
+               if (!bh->b_blocknr) {
+                       err = -EIO;
+                       down(&inode->i_sem);
+                       phys = fs_get_block (inode, block, 1, &err, &created);
+                       up(&inode->i_sem);
+                       if (!phys)
+                               goto out;
+
+                       init_buffer(bh, inode->i_dev, phys, end_buffer_io_sync, NULL);
+                       bh->b_state = (1<<BH_Uptodate);
                 } else {
+                       /*
+                        * block already exists, just mark it dirty:
+                        */
+                       bh->b_end_io = end_buffer_io_sync;
+                       set_bit(BH_Uptodate, &bh->b_state);
                 }
-               bh->b_state = 0;
-               bh->b_count = 0;
-               put_unused_buffer_head(bh);
-               bh = next;
+               mark_buffer_dirty(bh, 0);
+
+               bh = bh->b_this_page;
+               block++;
         } while (bh != head);
-       page->buffers = NULL;
  
+       SetPageUptodate(page);
         return 0;
+out:
+       ClearPageUptodate(page);
+       return err;
  }
  
-
-long block_write_one_page (struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf, fs_getblock_t fs_get_block)
+int block_write_one_page (struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf, fs_getblock_t fs_get_block)
  {
         struct dentry *dentry = file->f_dentry;
         struct inode *inode = dentry->d_inode;
@@ -1373,7 +1401,7 @@ long block_write_one_page (struct file *file, struct page *page, unsigned long o
         unsigned long blocksize, start_block, end_block;
         unsigned long start_offset, start_bytes, end_bytes;
         unsigned long bbits, phys, blocks, i, len;
-       struct buffer_head *bh;
+       struct buffer_head *bh, *head;
         char * target_buf;
  
         target_buf = (char *)page_address(page) + offset;
@@ -1383,22 +1411,9 @@ long block_write_one_page (struct file *file, struct page *page, unsigned long o
                 BUG();
  
         blocksize = inode->i_sb->s_blocksize;
-       if (!page->buffers) {
-               struct buffer_head *head, *tail;
-
-               head = create_buffers(page_address(page), blocksize, 1);
-               if (page->buffers)
-                       BUG();
-
-               bh = head;
-               do {
-                       bh->b_dev = inode->i_dev;
-                       tail = bh;
-                       bh = bh->b_this_page;
-               } while (bh);
-               tail->b_this_page = head;
-               page->buffers = head;
-       }
+       if (!page->buffers)
+               create_empty_buffers(page, inode, blocksize);
+       head = page->buffers;
  
         bbits = inode->i_sb->s_blocksize_bits;
         block = page->offset >> bbits;
@@ -1425,8 +1440,8 @@ long block_write_one_page (struct file *file, struct page *page, unsigned long o
         if (page->offset & (PAGE_SIZE-1))
                 BUG();
  
-       bh = page->buffers;
         i = 0;
+       bh = head;
         do {
                 if (!bh)
                         BUG();
@@ -1434,9 +1449,38 @@ long block_write_one_page (struct file *file, struct page *page, unsigned long o
                 if ((i < start_block) || (i > end_block)) {
                         goto skip;
                 }
+               unlock_kernel();
+
+               err = -EFAULT;
+               if (start_offset) {
+                       len = start_bytes;
+                       start_offset = 0;
+               } else
+               if (end_bytes && (i == end_block)) {
+                       len = end_bytes;
+                       end_bytes = 0;
+               } else {
+                       /*
+                        * Overwritten block.
+                        */
+                       len = blocksize;
+               }
+               if (copy_from_user(target_buf, buf, len))
+                       goto out_nolock;
+               target_buf += len;
+               buf += len;
+
+               /*
+                * we dirty buffers only after copying the data into
+                * the page - this way we can dirty the buffer even if
+                * the bh is still doing IO.
+                */
+               lock_kernel();
                 if (!bh->b_blocknr) {
-                       phys = fs_get_block (inode, block, 1, &err, &created);
                         err = -EIO;
+                       down(&inode->i_sem);
+                       phys = fs_get_block (inode, block, 1, &err, &created);
+                       up(&inode->i_sem);
                         if (!phys)
                                 goto out;
  
@@ -1458,48 +1502,20 @@ long block_write_one_page (struct file *file, struct page *page, unsigned long o
                         lock_kernel();
  
                         init_buffer(bh, inode->i_dev, phys, end_buffer_io_sync, NULL);
-                       bh->b_state = (1<<BH_Dirty) | (1<<BH_Uptodate);
-                       bh->b_list = BUF_DIRTY;
-                       insert_into_dirty_queue(bh);
+                       bh->b_state = (1<<BH_Uptodate);
                 } else {
                         /*
                          * block already exists, just mark it dirty:
                          */
                         bh->b_end_io = end_buffer_io_sync;
-                       set_bit(BH_Dirty, &bh->b_state);
                         set_bit(BH_Uptodate, &bh->b_state);
                 }
-               unlock_kernel();
-
-               err = -EFAULT;
-               if (start_offset) {
-                       len = start_bytes;
-                       start_offset = 0;
-               } else
-               if (end_bytes && (i == end_block)) {
-                       len = end_bytes;
-                       end_bytes = 0;
-               } else {
-                       /*
-                        * Overwritten block.
-                        */
-                       len = blocksize;
-               }
-               if (copy_from_user(target_buf, buf, len))
-                       goto out_nolock;
-               target_buf += len;
-               buf += len;
-
-               lock_kernel();
-               if (bh->b_list != BUF_DIRTY) {
-                       bh->b_list = BUF_DIRTY;
-                       insert_into_dirty_queue(bh);
-               }
+               mark_buffer_dirty(bh, 0);
  skip:
                 i++;
                 block++;
                 bh = bh->b_this_page;
-       } while (i < blocks);
+       } while (bh != head);
         unlock_kernel();
  
         SetPageUptodate(page);
@@ -1545,7 +1561,7 @@ int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size, int bmap)
         do {
                 block = *(b++);
  
-               if (fresh && (bh->b_count != 1))
+               if (fresh && (bh->b_count != 0))
                         BUG();
                 if (rw == READ) {
                         if (!fresh)
@@ -1569,12 +1585,8 @@ int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size, int bmap)
                                 if (!block)
                                         BUG();
                         }
-                       set_bit(BH_Dirty, &bh->b_state);
                         set_bit(BH_Uptodate, &bh->b_state);
-                       if (bh->b_list != BUF_DIRTY) {
-                               bh->b_list = BUF_DIRTY;
-                               insert_into_dirty_queue(bh);
-                       }
+                       mark_buffer_dirty(bh, 0);
                         arr[nr++] = bh;
                 }
                 bh = bh->b_this_page;
@@ -1701,7 +1713,6 @@ static int grow_buffers(int size)
                         tmp->b_next_free = tmp;
                 }
                 insert_point = tmp;
-               nr_buffers++;
                 if (tmp->b_this_page)
                         tmp = tmp->b_this_page;
                 else
@@ -1714,6 +1725,12 @@ static int grow_buffers(int size)
         return 1;
  }
  
+/*
+ * Can the buffer be thrown out?
+ */
+#define BUFFER_BUSY_BITS       ((1<<BH_Dirty) | (1<<BH_Lock) | (1<<BH_Protected))
+#define buffer_busy(bh)        ((bh)->b_count || ((bh)->b_state & BUFFER_BUSY_BITS))
+
  /*
   * try_to_free_buffers() checks if all the buffers on this particular page
   * are unused, and free's the page if so.
@@ -1732,6 +1749,13 @@ int try_to_free_buffers(struct page * page)
                 tmp = tmp->b_this_page;
                 if (!buffer_busy(p))
                         continue;
+{
+       static int count = 30;
+       if (count) {
+               count--;
+               printk("bh %p (%04x:%ld): count=%d, state=0x%04x\n", p, p->b_dev, p->b_blocknr, p->b_count, p->b_state);
+       }
+}
  
                 wakeup_bdflush(0);
                 return 0;
@@ -1741,7 +1765,6 @@ int try_to_free_buffers(struct page * page)
         do {
                 struct buffer_head * p = tmp;
                 tmp = tmp->b_this_page;
-               nr_buffers--;
                 remove_from_queues(p);
                 put_unused_buffer_head(p);
         } while (tmp != bh);
diff --git a/fs/ext2/file.c b/fs/ext2/file.c

index dd4c5b38a352c2f00a59880962950801e3d36457..806859ba0be272643319a41837a39acbd02570d4 100644 (file)
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -108,34 +108,7 @@ static inline void remove_suid(struct inode *inode)
  
  static int ext2_writepage (struct file * file, struct page * page)
  {
-       struct dentry *dentry = file->f_dentry;
-       struct inode *inode = dentry->d_inode;
-       unsigned long block;
-       int *p, nr[PAGE_SIZE/512];
-       int i, err, created;
-       struct buffer_head *bh;
-
-       i = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
-       block = page->offset >> inode->i_sb->s_blocksize_bits;
-       p = nr;
-       bh = page->buffers;
-       do {
-               if (bh && bh->b_blocknr)
-                       *p = bh->b_blocknr;
-               else
-                       *p = ext2_getblk_block (inode, block, 1, &err, &created);
-               if (!*p)
-                       return -EIO;
-               i--;
-               block++;
-               p++;
-               if (bh)
-                       bh = bh->b_this_page;
-       } while (i > 0);
-
-       /* IO start */
-       brw_page(WRITE, page, inode->i_dev, nr, inode->i_sb->s_blocksize, 1);
-       return 0;
+       return block_write_full_page(file, page, ext2_getblk_block);
  }
  
  static long ext2_write_one_page (struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf)
diff --git a/fs/inode.c b/fs/inode.c

index ee8602939f1affa3d7e6500ead9071ac8d980f85..8b268dd41792af8b38cdf0aabd0dafd2be78aee6 100644 (file)
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -130,7 +130,6 @@ static inline void init_once(struct inode * inode)
         INIT_LIST_HEAD(&inode->i_hash);
         INIT_LIST_HEAD(&inode->i_dentry);
         sema_init(&inode->i_sem, 1);
-       sema_init(&inode->i_atomic_write, 1);
  }
  
  static inline void write_inode(struct inode *inode)
@@ -767,9 +766,6 @@ kdevname(inode->i_dev), inode->i_ino, inode->i_count);
  if (atomic_read(&inode->i_sem.count) != 1)
  printk(KERN_ERR "iput: Aieee, semaphore in use inode %s/%ld, count=%d\n",
  kdevname(inode->i_dev), inode->i_ino, atomic_read(&inode->i_sem.count));
-if (atomic_read(&inode->i_atomic_write.count) != 1)
-printk(KERN_ERR "iput: Aieee, atomic write semaphore in use inode %s/%ld, count=%d\n",
-kdevname(inode->i_dev), inode->i_ino, atomic_read(&inode->i_sem.count));
  #endif
                 }
                 if (inode->i_count > (1<<31)) {
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c

index 6e8930c70691df5619a67b11fc0e14f8520abd98..8c396f3e63a0c5ee0b513d4e6599788c7f2fa6a0 100644 (file)
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -112,14 +112,6 @@ repeat:
         if (j < sb->u.minix_sb.s_firstdatazone ||
             j >= sb->u.minix_sb.s_nzones)
                 return 0;
-       if (!(bh = getblk(sb->s_dev,j,BLOCK_SIZE))) {
-               printk("new_block: cannot get block");
-               return 0;
-       }
-       memset(bh->b_data, 0, BLOCK_SIZE);
-       mark_buffer_uptodate(bh, 1);
-       mark_buffer_dirty(bh, 1);
-       brelse(bh);
         return j;
  }
  
diff --git a/fs/minix/file.c b/fs/minix/file.c

index f6ddda02140c22fc74ae7e6622d78c4699895bf7..55ed5fd5d93bb42de1cb8ad6171e95c4d8aefc06 100644 (file)
--- a/fs/minix/file.c
+++ b/fs/minix/file.c
@@ -27,7 +27,51 @@
  #include <linux/fs.h>
  #include <linux/minix_fs.h>
  
-static ssize_t minix_file_write(struct file *, const char *, size_t, loff_t *);
+static int minix_writepage(struct file *file, struct page *page)
+{
+       struct dentry *dentry = file->f_dentry;
+       struct inode *inode = dentry->d_inode;
+       unsigned long block;
+       int *p, nr[PAGE_SIZE/BLOCK_SIZE];
+       int i, err, created;
+       struct buffer_head *bh;
+
+       i = PAGE_SIZE / BLOCK_SIZE;
+       block = page->offset / BLOCK_SIZE;
+       p = nr;
+       bh = page->buffers;
+       do {
+               if (bh && bh->b_blocknr)
+                       *p = bh->b_blocknr;
+               else
+                       *p = minix_getblk_block(inode, block, 1, &err, &created);
+               if (!*p)
+                       return -EIO;
+               i--;
+               block++;
+               p++;
+               if (bh)
+                       bh = bh->b_this_page;
+       } while(i > 0);
+
+       /* IO start */
+       brw_page(WRITE, page, inode->i_dev, nr, BLOCK_SIZE, 1);
+       return 0;
+}
+
+static long minix_write_one_page(struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char *buf)
+{
+       return block_write_one_page(file, page, offset, bytes, buf, minix_getblk_block);
+}
+
+/*
+ * Write to a file (through the page cache).
+ */
+static ssize_t
+minix_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
+{
+       return generic_file_write(file, buf, count, ppos, minix_write_one_page);
+}
  
  /*
   * We have mostly NULLs here: the current defaults are OK for
@@ -61,74 +105,12 @@ struct inode_operations minix_file_inode_operations = {
         NULL,                   /* readlink */
         NULL,                   /* follow_link */
         generic_readpage,       /* readpage */
-       NULL,                   /* writepage */
+       minix_writepage,        /* writepage */
         minix_bmap,             /* bmap */
         minix_truncate,         /* truncate */
-       NULL                    /* permission */
+       NULL,                   /* permission */
+       NULL,                   /* smap */
+       NULL,                   /* updatepage */
+       NULL,                   /* revalidate */
+       generic_block_flushpage,/* flushpage */
  };
-
-static ssize_t minix_file_write(struct file * filp, const char * buf,
-                               size_t count, loff_t *ppos)
-{
-       struct inode * inode = filp->f_dentry->d_inode;
-       off_t pos;
-       ssize_t written, c;
-       struct buffer_head * bh;
-       char * p;
-
-       if (!inode) {
-               printk("minix_file_write: inode = NULL\n");
-               return -EINVAL;
-       }
-       if (!S_ISREG(inode->i_mode)) {
-               printk("minix_file_write: mode = %07o\n",inode->i_mode);
-               return -EINVAL;
-       }
-       if (filp->f_flags & O_APPEND)
-               pos = inode->i_size;
-       else
-               pos = *ppos;
-       written = 0;
-       while (written < count) {
-               bh = minix_getblk(inode,pos/BLOCK_SIZE,1);
-               if (!bh) {
-                       if (!written)
-                               written = -ENOSPC;
-                       break;
-               }
-               c = BLOCK_SIZE - (pos % BLOCK_SIZE);
-               if (c > count-written)
-                       c = count-written;
-               if (c != BLOCK_SIZE && !buffer_uptodate(bh)) {
-                       ll_rw_block(READ, 1, &bh);
-                       wait_on_buffer(bh);
-                       if (!buffer_uptodate(bh)) {
-                               brelse(bh);
-                               if (!written)
-                                       written = -EIO;
-                               break;
-                       }
-               }
-               p = (pos % BLOCK_SIZE) + bh->b_data;
-               c -= copy_from_user(p,buf,c);
-               if (!c) {
-                       brelse(bh);
-                       if (!written)
-                               written = -EFAULT;
-                       break;
-               }
-               update_vm_cache(inode, pos, p, c);
-               mark_buffer_uptodate(bh, 1);
-               mark_buffer_dirty(bh, 0);
-               brelse(bh);
-               pos += c;
-               written += c;
-               buf += c;
-       }
-       if (pos > inode->i_size)
-               inode->i_size = pos;
-       inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-       *ppos = pos;
-       mark_inode_dirty(inode);
-       return written;
-}
diff --git a/fs/minix/inode.c b/fs/minix/inode.c

index 5a29c53e0a83aa6330ac71a2aba8a51b47f0857f..088de42dcbd21594076316d8b890ce1456bcf528 100644 (file)
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -407,7 +407,7 @@ static int V2_block_bmap(struct buffer_head * bh, int nr)
         return tmp;
  }
  
-static int V2_minix_bmap(struct inode * inode,int block)
+static int V2_minix_bmap(struct inode * inode, int block)
  {
         int i;
  
@@ -454,7 +454,7 @@ static int V2_minix_bmap(struct inode * inode,int block)
  /*
   * The global minix fs bmap function.
   */
-int minix_bmap(struct inode * inode,int block)
+int minix_bmap(struct inode * inode, int block)
  {
         if (INODE_VERSION(inode) == MINIX_V1)
                 return V1_minix_bmap(inode, block);
@@ -465,8 +465,8 @@ int minix_bmap(struct inode * inode,int block)
  /*
   * The minix V1 fs getblk functions.
   */
-static struct buffer_head * V1_inode_getblk(struct inode * inode, int nr,
-                                           int create)
+static struct buffer_head * V1_inode_getblk(struct inode * inode, int nr, int create,
+                                           int metadata, int *phys_block, int *created)
  {
         int tmp;
         unsigned short *p;
@@ -476,31 +476,51 @@ static struct buffer_head * V1_inode_getblk(struct inode * inode, int nr,
  repeat:
         tmp = *p;
         if (tmp) {
-               result = getblk(inode->i_dev, tmp, BLOCK_SIZE);
-               if (tmp == *p)
-                       return result;
-               brelse(result);
-               goto repeat;
+               if (metadata) {
+                       result = getblk(inode->i_dev, tmp, BLOCK_SIZE);
+                       if (tmp == *p)
+                               return result;
+                       brelse(result);
+                       goto repeat;
+               } else {
+                       *phys_block = tmp;
+                       return NULL;
+               }
         }
         if (!create)
                 return NULL;
         tmp = minix_new_block(inode->i_sb);
         if (!tmp)
                 return NULL;
-       result = getblk(inode->i_dev, tmp, BLOCK_SIZE);
-       if (*p) {
-               minix_free_block(inode->i_sb,tmp);
-               brelse(result);
-               goto repeat;
+       if (metadata) {
+               result = getblk(inode->i_dev, tmp, BLOCK_SIZE);
+               if (*p) {
+                       minix_free_block(inode->i_sb, tmp);
+                       brelse(result);
+                       goto repeat;
+               }
+               memset(result->b_data, 0, BLOCK_SIZE);
+               mark_buffer_uptodate(result, 1);
+               mark_buffer_dirty(result, 1);
+       } else {
+               if (*p) {
+                       minix_free_block(inode->i_sb, tmp);
+                       goto repeat;
+               }
+               *phys_block = tmp;
+               result = NULL;
+               *created = 1;
         }
         *p = tmp;
+
         inode->i_ctime = CURRENT_TIME;
         mark_inode_dirty(inode);
         return result;
  }
  
  static struct buffer_head * V1_block_getblk(struct inode * inode,
-       struct buffer_head * bh, int nr, int create)
+       struct buffer_head * bh, int nr, int create,
+       int metadata, int *phys_block, int *created)
  {
         int tmp;
         unsigned short *p;
@@ -520,13 +540,19 @@ static struct buffer_head * V1_block_getblk(struct inode * inode,
  repeat:
         tmp = *p;
         if (tmp) {
-               result = getblk(bh->b_dev, tmp, BLOCK_SIZE);
-               if (tmp == *p) {
+               if (metadata) {
+                       result = getblk(bh->b_dev, tmp, BLOCK_SIZE);
+                       if (tmp == *p) {
+                               brelse(bh);
+                               return result;
+                       }
+                       brelse(result);
+                       goto repeat;
+               } else {
+                       *phys_block = tmp;
                         brelse(bh);
-                       return result;
+                       return NULL;
                 }
-               brelse(result);
-               goto repeat;
         }
         if (!create) {
                 brelse(bh);
@@ -537,49 +563,74 @@ repeat:
                 brelse(bh);
                 return NULL;
         }
-       result = getblk(bh->b_dev, tmp, BLOCK_SIZE);
-       if (*p) {
-               minix_free_block(inode->i_sb,tmp);
-               brelse(result);
-               goto repeat;
+       if (metadata) {
+               result = getblk(bh->b_dev, tmp, BLOCK_SIZE);
+               if (*p) {
+                       minix_free_block(inode->i_sb, tmp);
+                       brelse(result);
+                       goto repeat;
+               }
+               memset(result->b_data, 0, BLOCK_SIZE);
+               mark_buffer_uptodate(result, 1);
+               mark_buffer_dirty(result, 1);
+       } else {
+               if (*p) {
+                       minix_free_block(inode->i_sb, tmp);
+                       goto repeat;
+               }
+               *phys_block = tmp;
+               result = NULL;
+               *created = 1;
         }
+
         *p = tmp;
         mark_buffer_dirty(bh, 1);
         brelse(bh);
         return result;
  }
  
-static struct buffer_head * V1_minix_getblk(struct inode * inode, int block,
-                                           int create)
+int V1_getblk_block(struct inode * inode, long block, int create, int *err, int *created)
  {
-       struct buffer_head * bh;
+       struct buffer_head *bh, *tmp;
+       int phys_block;
  
-       if (block<0) {
+       *err = -EIO;
+       if (block < 0) {
                 printk("minix_getblk: block<0");
-               return NULL;
+               return 0;
         }
         if (block >= inode->i_sb->u.minix_sb.s_max_size/BLOCK_SIZE) {
                 printk("minix_getblk: block>big");
-               return NULL;
+               return 0;
+       }
+       *created = 0;
+       if (block < 7) {
+               tmp = V1_inode_getblk(inode, block, create,
+                                     0, &phys_block, created);
+               goto out;
         }
-       if (block < 7)
-               return V1_inode_getblk(inode,block,create);
         block -= 7;
         if (block < 512) {
-               bh = V1_inode_getblk(inode,7,create);
-               return V1_block_getblk(inode, bh, block, create);
+               bh = V1_inode_getblk(inode, 7, create, 1, NULL, NULL);
+               tmp = V1_block_getblk(inode, bh, block, create,
+                                     0, &phys_block, created);
+               goto out;
         }
         block -= 512;
-       bh = V1_inode_getblk(inode,8,create);
-       bh = V1_block_getblk(inode, bh, (block>>9) & 511, create);
-       return V1_block_getblk(inode, bh, block & 511, create);
+       bh = V1_inode_getblk(inode, 8, create, 1, NULL, NULL);
+       bh = V1_block_getblk(inode, bh, (block>>9) & 511, create, 1, NULL, NULL);
+       tmp = V1_block_getblk(inode, bh, block & 511, create, 0, &phys_block, created);
+
+out:
+       *err = 0;
+       return phys_block;
  }
  
  /*
   * The minix V2 fs getblk functions.
   */
-static struct buffer_head * V2_inode_getblk(struct inode * inode, int nr,
-                                           int create)
+static struct buffer_head * V2_inode_getblk(struct inode * inode, int nr, int create,
+                                           int metadata, int *phys_block, int *created)
  {
         int tmp;
         unsigned long *p;
@@ -589,31 +640,51 @@ static struct buffer_head * V2_inode_getblk(struct inode * inode, int nr,
  repeat:
         tmp = *p;
         if (tmp) {
-               result = getblk(inode->i_dev, tmp, BLOCK_SIZE);
-               if (tmp == *p)
-                       return result;
-               brelse(result);
-               goto repeat;
+               if (metadata) {
+                       result = getblk(inode->i_dev, tmp, BLOCK_SIZE);
+                       if (tmp == *p)
+                               return result;
+                       brelse(result);
+                       goto repeat;
+               } else {
+                       *phys_block = tmp;
+                       return NULL;
+               }
         }
         if (!create)
                 return NULL;
         tmp = minix_new_block(inode->i_sb);
         if (!tmp)
                 return NULL;
-       result = getblk(inode->i_dev, tmp, BLOCK_SIZE);
-       if (*p) {
-               minix_free_block(inode->i_sb,tmp);
-               brelse(result);
-               goto repeat;
+       if (metadata) {
+               result = getblk(inode->i_dev, tmp, BLOCK_SIZE);
+               if (*p) {
+                       minix_free_block(inode->i_sb, tmp);
+                       brelse(result);
+                       goto repeat;
+               }
+               memset(result->b_data, 0, BLOCK_SIZE);
+               mark_buffer_uptodate(result, 1);
+               mark_buffer_dirty(result, 1);
+       } else {
+               if (*p) {
+                       minix_free_block(inode->i_sb, tmp);
+                       goto repeat;
+               }
+               *phys_block = tmp;
+               result = NULL;
+               *created = 1;
         }
         *p = tmp;
+
         inode->i_ctime = CURRENT_TIME;
         mark_inode_dirty(inode);
         return result;
  }
  
  static struct buffer_head * V2_block_getblk(struct inode * inode,
-       struct buffer_head * bh, int nr, int create)
+       struct buffer_head * bh, int nr, int create,
+       int metadata, int *phys_block, int *created)
  {
         int tmp;
         unsigned long *p;
@@ -633,13 +704,19 @@ static struct buffer_head * V2_block_getblk(struct inode * inode,
  repeat:
         tmp = *p;
         if (tmp) {
-               result = getblk(bh->b_dev, tmp, BLOCK_SIZE);
-               if (tmp == *p) {
+               if (metadata) {
+                       result = getblk(bh->b_dev, tmp, BLOCK_SIZE);
+                       if (tmp == *p) {
+                               brelse(bh);
+                               return result;
+                       }
+                       brelse(result);
+                       goto repeat;
+               } else {
+                       *phys_block = tmp;
                         brelse(bh);
-                       return result;
+                       return NULL;
                 }
-               brelse(result);
-               goto repeat;
         }
         if (!create) {
                 brelse(bh);
@@ -650,60 +727,107 @@ repeat:
                 brelse(bh);
                 return NULL;
         }
-       result = getblk(bh->b_dev, tmp, BLOCK_SIZE);
-       if (*p) {
-               minix_free_block(inode->i_sb,tmp);
-               brelse(result);
-               goto repeat;
+       if (metadata) {
+               result = getblk(bh->b_dev, tmp, BLOCK_SIZE);
+               if (*p) {
+                       minix_free_block(inode->i_sb, tmp);
+                       brelse(result);
+                       goto repeat;
+               }
+               memset(result->b_data, 0, BLOCK_SIZE);
+               mark_buffer_uptodate(result, 1);
+               mark_buffer_dirty(result, 1);
+       } else {
+               if (*p) {
+                       minix_free_block(inode->i_sb, tmp);
+                       goto repeat;
+               }
+               *phys_block = tmp;
+               result = NULL;
+               *created = 1;
         }
+
         *p = tmp;
         mark_buffer_dirty(bh, 1);
         brelse(bh);
         return result;
  }
  
-static struct buffer_head * V2_minix_getblk(struct inode * inode, int block,
-                                           int create)
+int V2_getblk_block(struct inode * inode, int block, int create, int *err, int *created)
  {
-       struct buffer_head * bh;
+       struct buffer_head * bh, *tmp;
+       int phys_block;
  
-       if (block<0) {
+       *err = -EIO;
+       if (block < 0) {
                 printk("minix_getblk: block<0");
-               return NULL;
+               return 0;
         }
         if (block >= inode->i_sb->u.minix_sb.s_max_size/BLOCK_SIZE) {
                 printk("minix_getblk: block>big");
-               return NULL;
+               return 0;
+       }
+       *created = 0;
+       if (block < 7) {
+               tmp = V2_inode_getblk(inode, block, create,
+                                     0, &phys_block, created);
+               goto out;
         }
-       if (block < 7)
-               return V2_inode_getblk(inode,block,create);
         block -= 7;
         if (block < 256) {
-               bh = V2_inode_getblk(inode,7,create);
-               return V2_block_getblk(inode, bh, block, create);
+               bh = V2_inode_getblk(inode, 7, create, 1, NULL, NULL);
+               tmp = V2_block_getblk(inode, bh, block, create,
+                                     0, &phys_block, created);
+               goto out;
         }
         block -= 256;
         if (block < 256*256) {
-               bh = V2_inode_getblk(inode,8,create);
-               bh = V2_block_getblk(inode, bh, (block>>8) & 255, create);
-               return V2_block_getblk(inode, bh, block & 255, create);
+               bh = V2_inode_getblk(inode, 8, create, 1, NULL, NULL);
+               bh = V2_block_getblk(inode, bh, (block>>8) & 255, create,
+                                    1, NULL, NULL);
+               tmp = V2_block_getblk(inode, bh, block & 255, create,
+                                     0, &phys_block, created);
+               goto out;
         }
         block -= 256*256;
-       bh = V2_inode_getblk(inode,9,create);
-       bh = V2_block_getblk(inode, bh, (block >> 16) & 255, create);
-       bh = V2_block_getblk(inode, bh, (block >> 8) & 255, create);
-       return V2_block_getblk(inode, bh, block & 255, create);
+       bh = V2_inode_getblk(inode, 9, create, 1, NULL, NULL);
+       bh = V2_block_getblk(inode, bh, (block >> 16) & 255, create, 1, NULL, NULL);
+       bh = V2_block_getblk(inode, bh, (block >> 8) & 255, create, 1, NULL, NULL);
+       tmp = V2_block_getblk(inode, bh, block & 255, create, 0, &phys_block, created);
+
+out:
+       *err = 0;
+       return phys_block;
+}
+
+int minix_getblk_block (struct inode *inode, long block,
+                       int create, int *err, int *created)
+{
+       if (INODE_VERSION(inode) == MINIX_V1)
+               return V1_getblk_block(inode, block, create, err, created);
+       else
+               return V2_getblk_block(inode, block, create, err, created);
  }
  
  /*
   * the global minix fs getblk function.
   */
-struct buffer_head * minix_getblk(struct inode * inode, int block, int create)
+struct buffer_head *minix_getblk (struct inode *inode, int block, int create)
  {
-       if (INODE_VERSION(inode) == MINIX_V1)
-               return V1_minix_getblk(inode,block,create);
-       else
-               return V2_minix_getblk(inode,block,create);
+       struct buffer_head *tmp = NULL;
+       int phys_block;
+       int err, created;
+
+       phys_block = minix_getblk_block(inode, block, create, &err, &created);
+       if (phys_block) {
+               tmp = getblk(inode->i_dev, phys_block, BLOCK_SIZE);
+               if (created) {
+                       memset(tmp->b_data, 0, BLOCK_SIZE);
+                       mark_buffer_uptodate(tmp, 1);
+                       mark_buffer_dirty(tmp, 1);
+               }
+       }
+       return tmp;
  }
  
  struct buffer_head * minix_bread(struct inode * inode, int block, int create)
diff --git a/fs/minix/truncate.c b/fs/minix/truncate.c

index a94806fdf2f597f3cd277757fdddf5bdf5145c6f..4718e092e48ada1438e8bb6472eb66f09653eb93 100644 (file)
--- a/fs/minix/truncate.c
+++ b/fs/minix/truncate.c
@@ -32,6 +32,9 @@
   * general case (size = XXX). I hope.
   */
  
+#define DATA_BUFFER_USED(bh) \
+       ((bh->b_count > 1) || buffer_locked(bh))
+
  /*
   * The functions for minix V1 fs truncation.
   */
@@ -52,7 +55,7 @@ repeat:
                         brelse(bh);
                         goto repeat;
                 }
-               if ((bh && bh->b_count != 1) || tmp != *p) {
+               if ((bh && DATA_BUFFER_USED(bh)) || tmp != *p) {
                         retry = 1;
                         brelse(bh);
                         continue;
@@ -103,7 +106,7 @@ repeat:
                         brelse(bh);
                         goto repeat;
                 }
-               if ((bh && bh->b_count != 1) || tmp != *ind) {
+               if ((bh && DATA_BUFFER_USED(bh)) || tmp != *ind) {
                         retry = 1;
                         brelse(bh);
                         continue;
@@ -216,7 +219,7 @@ repeat:
                         brelse(bh);
                         goto repeat;
                 }
-               if ((bh && bh->b_count != 1) || tmp != *p) {
+               if ((bh && DATA_BUFFER_USED(bh)) || tmp != *p) {
                         retry = 1;
                         brelse(bh);
                         continue;
@@ -267,7 +270,7 @@ repeat:
                         brelse(bh);
                         goto repeat;
                 }
-               if ((bh && bh->b_count != 1) || tmp != *ind) {
+               if ((bh && DATA_BUFFER_USED(bh)) || tmp != *ind) {
                         retry = 1;
                         brelse(bh);
                         continue;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c

index f137542feb54d0e2143ca24b9807dea86d8a5c0a..c64a0222967230c0035f029064451e074eb9e8bb 100644 (file)
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -118,6 +118,61 @@ struct nfs_cookie_table {
  };
  static kmem_cache_t *nfs_cookie_cachep;
  
+/* This whole scheme relies on the fact that dirent cookies
+ * are monotonically increasing.
+ *
+ * Another invariant is that once we have a valid non-zero
+ * EOF marker cached, we also have the complete set of cookie
+ * table entries.
+ *
+ * We return the page offset assosciated with the page where
+ * cookie must be if it exists at all, however if we can not
+ * figure that out conclusively, we return < 0.
+ */
+static long __nfs_readdir_offset(struct inode *inode, __u32 cookie)
+{
+       struct nfs_cookie_table *p;
+       unsigned long ret = 0;
+
+       for(p = NFS_COOKIES(inode); p != NULL; p = p->next) {
+               int i;
+
+               for (i = 0; i < COOKIES_PER_CHUNK; i++) {
+                       __u32 this_cookie = p->cookies[i];
+
+                       /* End of known cookies, EOF is our only hope. */
+                       if (!this_cookie)
+                               goto check_eof;
+
+                       /* Next cookie is larger, must be in previous page. */
+                       if (this_cookie > cookie)
+                               return ret;
+
+                       ret += 1;
+
+                       /* Exact cookie match, it must be in this page :-) */
+                       if (this_cookie == cookie)
+                               return ret;
+               }
+       }
+check_eof:
+       if (NFS_DIREOF(inode) != 0)
+               return ret;
+
+       return -1L;
+}
+
+static __inline__ long nfs_readdir_offset(struct inode *inode, __u32 cookie)
+{
+       /* Cookie zero is always at page offset zero.   Optimize the
+        * other common case since most directories fit entirely
+        * in one page.
+        */
+       if (!cookie || (!NFS_COOKIES(inode) && NFS_DIREOF(inode)))
+               return 0;
+       return __nfs_readdir_offset(inode, cookie);
+}
+
  /* Since a cookie of zero is declared special by the NFS
   * protocol, we easily can tell if a cookie in an existing
   * table chunk is valid or not.
@@ -148,38 +203,7 @@ static __inline__ __u32 *find_cookie(struct inode *inode, unsigned long off)
         return ret;
  }
  
-/* Now we cache directories properly, by stuffing the dirent
- * data directly in the page cache.
- *
- * Inode invalidation due to refresh etc. takes care of
- * _everything_, no sloppy entry flushing logic, no extraneous
- * copying, network direct to page cache, the way it was meant
- * to be.
- *
- * NOTE: Dirent information verification is done always by the
- *      page-in of the RPC reply, nowhere else, this simplies
- *      things substantially.
- */
  #define NFS_NAMELEN_ALIGN(__len) ((((__len)+3)>>2)<<2)
-static u32 find_midpoint(__u32 *p, u32 doff)
-{
-       u32 walk = doff & PAGE_MASK;
-
-       while(*p++ != 0) {
-               __u32 skip;
-
-               p++; /* skip fileid */
-
-               /* Skip len, name, and cookie. */
-               skip = NFS_NAMELEN_ALIGN(*p++);
-               p += (skip >> 2) + 1;
-               walk += skip + (4 * sizeof(__u32));
-               if (walk >= doff)
-                       break;
-       }
-       return walk;
-}
-
  static int create_cookie(__u32 cookie, unsigned long off, struct inode *inode)
  {
         struct nfs_cookie_table **cpp;
@@ -211,28 +235,37 @@ static int create_cookie(__u32 cookie, unsigned long off, struct inode *inode)
         return 0;
  }
  
-static struct page *try_to_get_dirent_page(struct file *, unsigned long, int);
+static struct page *try_to_get_dirent_page(struct file *, __u32, int);
  
  /* Recover from a revalidation flush.  The case here is that
   * the inode for the directory got invalidated somehow, and
   * all of our cached information is lost.  In order to get
   * a correct cookie for the current readdir request from the
   * user, we must (re-)fetch older readdir page cache entries.
+ *
+ * Returns < 0 if some error occurrs, else it is the page offset
+ * to fetch.
   */
-static int refetch_to_readdir_off(struct file *file, struct inode *inode, u32 off)
+static long refetch_to_readdir_cookie(struct file *file, struct inode *inode)
  {
         struct page *page;
-       u32 cur_off, goal_off = off & PAGE_MASK;
+       u32 goal_cookie = file->f_pos;
+       long cur_off, ret = -1L;
  
  again:
         cur_off = 0;
-       while (cur_off < goal_off) {
+       for (;;) {
                 page = find_get_page(inode, cur_off);
                 if (page) {
                         if (!Page_Uptodate(page))
                                 goto out_error;
                 } else {
-                       page = try_to_get_dirent_page(file, cur_off, 0);
+                       __u32 *cp = find_cookie(inode, cur_off);
+
+                       if (!cp)
+                               goto out_error;
+
+                       page = try_to_get_dirent_page(file, *cp, 0);
                         if (!page) {
                                 if (!cur_off)
                                         goto out_error;
@@ -243,17 +276,33 @@ again:
                 }
                 page_cache_release(page);
  
-               cur_off += PAGE_SIZE;
+               if ((ret = nfs_readdir_offset(inode, goal_cookie)) >= 0)
+                       goto out;
+
+               cur_off += 1;
         }
-       return 0;
+out:
+       return ret;
  
  out_error:
         if (page)
                 page_cache_release(page);
-       return -1;
+       goto out;
  }
  
-static struct page *try_to_get_dirent_page(struct file *file, unsigned long offset, int refetch_ok)
+/* Now we cache directories properly, by stuffing the dirent
+ * data directly in the page cache.
+ *
+ * Inode invalidation due to refresh etc. takes care of
+ * _everything_, no sloppy entry flushing logic, no extraneous
+ * copying, network direct to page cache, the way it was meant
+ * to be.
+ *
+ * NOTE: Dirent information verification is done always by the
+ *      page-in of the RPC reply, nowhere else, this simplies
+ *      things substantially.
+ */
+static struct page *try_to_get_dirent_page(struct file *file, __u32 cookie, int refetch_ok)
  {
         struct nfs_readdirargs rd_args;
         struct nfs_readdirres rd_res;
@@ -261,6 +310,7 @@ static struct page *try_to_get_dirent_page(struct file *file, unsigned long offs
         struct inode *inode = dentry->d_inode;
         struct page *page, **hash;
         unsigned long page_cache;
+       long offset;
         __u32 *cookiep;
  
         page = NULL;
@@ -268,10 +318,19 @@ static struct page *try_to_get_dirent_page(struct file *file, unsigned long offs
         if (!page_cache)
                 goto out;
  
-       while ((cookiep = find_cookie(inode, offset)) == NULL) {
+       if ((offset = nfs_readdir_offset(inode, cookie)) < 0) {
                 if (!refetch_ok ||
-                   refetch_to_readdir_off(file, inode, file->f_pos))
+                   (offset = refetch_to_readdir_cookie(file, inode)) < 0) {
+                       page_cache_free(page_cache);
                         goto out;
+               }
+       }
+
+       cookiep = find_cookie(inode, offset);
+       if (!cookiep) {
+               /* Gross fatal error. */
+               page_cache_free(page_cache);
+               goto out;
         }
  
         hash = page_hash(inode, offset);
@@ -302,8 +361,7 @@ repeat:
         } while(rd_res.bufsiz > 0);
  
         if (rd_res.bufsiz < 0)
-               NFS_DIREOF(inode) =
-                       (offset << PAGE_CACHE_SHIFT) + -(rd_res.bufsiz);
+               NFS_DIREOF(inode) = rd_res.cookie;
         else if (create_cookie(rd_res.cookie, offset, inode))
                 goto error;
  
@@ -318,31 +376,35 @@ error:
         goto unlock_out;
  }
  
-static __inline__ u32 nfs_do_filldir(__u32 *p, u32 doff,
+/* Seek up to dirent assosciated with the passed in cookie,
+ * then fill in dirents found.  Return the last cookie
+ * actually given to the user, to update the file position.
+ */
+static __inline__ u32 nfs_do_filldir(__u32 *p, u32 cookie,
                                      void *dirent, filldir_t filldir)
  {
         u32 end;
  
-       if (doff & ~PAGE_CACHE_MASK) {
-               doff = find_midpoint(p, doff);
-               p += (doff & ~PAGE_CACHE_MASK) >> 2;
-       }
         while((end = *p++) != 0) {
-               __u32 fileid = *p++;
-               __u32 len = *p++;
-               __u32 skip = NFS_NAMELEN_ALIGN(len);
-               char *name = (char *) p;
-
-               /* Skip the cookie. */
-               p = ((__u32 *) (name + skip)) + 1;
-               if (filldir(dirent, name, len, doff, fileid) < 0)
-                       goto out;
-               doff += (skip + (4 * sizeof(__u32)));
+               __u32 fileid, len, skip, this_cookie;
+               char *name;
+
+               fileid = *p++;
+               len = *p++;
+               name = (char *) p;
+               skip = NFS_NAMELEN_ALIGN(len);
+               p += (skip >> 2);
+               this_cookie = *p++;
+
+               if (this_cookie < cookie)
+                       continue;
+
+               cookie = this_cookie;
+               if (filldir(dirent, name, len, cookie, fileid) < 0)
+                       break;
         }
-       if (!*p)
-               doff = PAGE_CACHE_ALIGN(doff);
-out:
-       return doff;
+
+       return cookie;
  }
  
  /* The file offset position is represented in pure bytes, to
@@ -357,7 +419,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
         struct dentry *dentry = filp->f_dentry;
         struct inode *inode = dentry->d_inode;
         struct page *page, **hash;
-       unsigned long offset;
+       long offset;
         int res;
  
         res = nfs_revalidate_inode(NFS_DSERVER(dentry), dentry);
@@ -367,7 +429,9 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
         if (NFS_DIREOF(inode) && filp->f_pos >= NFS_DIREOF(inode))
                 return 0;
  
-       offset = filp->f_pos >> PAGE_CACHE_SHIFT;
+       if ((offset = nfs_readdir_offset(inode, filp->f_pos)) < 0)
+               goto no_dirent_page;
+
         hash = page_hash(inode, offset);
         page = __find_get_page(inode, offset, *hash);
         if (!page)
@@ -381,7 +445,7 @@ success:
         return 0;
  
  no_dirent_page:
-       page = try_to_get_dirent_page(filp, offset, 1);
+       page = try_to_get_dirent_page(filp, filp->f_pos, 1);
         if (!page)
                 goto no_page;
  
@@ -393,20 +457,39 @@ no_page:
         return -EIO;
  }
  
-/* Invalidate directory cookie caches and EOF marker
- * for an inode.
+/* Flush directory cookie and EOF caches for an inode.
+ * So we don't thrash allocating/freeing cookie tables,
+ * we keep the cookies around until the inode is
+ * deleted/reused.
+ */
+__inline__ void nfs_flush_dircache(struct inode *inode)
+{
+       struct nfs_cookie_table *p = NFS_COOKIES(inode);
+
+       while (p != NULL) {
+               int i;
+
+               for(i = 0; i < COOKIES_PER_CHUNK; i++)
+                       p->cookies[i] = 0;
+
+               p = p->next;
+       }
+       NFS_DIREOF(inode) = 0;
+}
+
+/* Free up directory cache state, this happens when
+ * nfs_delete_inode is called on an NFS directory.
   */
-__inline__ void nfs_invalidate_dircache(struct inode *inode)
+void nfs_free_dircache(struct inode *inode)
  {
         struct nfs_cookie_table *p = NFS_COOKIES(inode);
  
-       if (p != NULL) {
-               NFS_COOKIES(inode) = NULL;
-               do {    struct nfs_cookie_table *next = p->next;
-                       kmem_cache_free(nfs_cookie_cachep, p);
-                       p = next;
-               } while (p != NULL);
+       while (p != NULL) {
+               struct nfs_cookie_table *next = p->next;
+               kmem_cache_free(nfs_cookie_cachep, p);
+               p = next;
         }
+       NFS_COOKIES(inode) = NULL;
         NFS_DIREOF(inode) = 0;
  }
  
@@ -532,11 +615,11 @@ out_bad:
         /* Purge readdir caches. */
         if (dentry->d_parent->d_inode) {
                 invalidate_inode_pages(dentry->d_parent->d_inode);
-               nfs_invalidate_dircache(dentry->d_parent->d_inode);
+               nfs_flush_dircache(dentry->d_parent->d_inode);
         }
         if (inode && S_ISDIR(inode->i_mode)) {
                 invalidate_inode_pages(inode);
-               nfs_invalidate_dircache(inode);
+               nfs_flush_dircache(inode);
         }
         return 0;
  }
@@ -733,7 +816,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode)
          * Invalidate the dir cache before the operation to avoid a race.
          */
         invalidate_inode_pages(dir);
-       nfs_invalidate_dircache(dir);
+       nfs_flush_dircache(dir);
         error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
                         dentry->d_name.name, &sattr, &fhandle, &fattr);
         if (!error)
@@ -763,7 +846,7 @@ static int nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, int rde
         sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1;
  
         invalidate_inode_pages(dir);
-       nfs_invalidate_dircache(dir);
+       nfs_flush_dircache(dir);
         error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
                                 dentry->d_name.name, &sattr, &fhandle, &fattr);
         if (!error)
@@ -798,7 +881,7 @@ static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
          */
         d_drop(dentry);
         invalidate_inode_pages(dir);
-       nfs_invalidate_dircache(dir);
+       nfs_flush_dircache(dir);
         error = nfs_proc_mkdir(NFS_DSERVER(dentry), NFS_FH(dentry->d_parent),
                                 dentry->d_name.name, &sattr, &fhandle, &fattr);
         return error;
@@ -819,7 +902,7 @@ dentry->d_inode->i_count, dentry->d_inode->i_nlink);
  #endif
  
         invalidate_inode_pages(dir);
-       nfs_invalidate_dircache(dir);
+       nfs_flush_dircache(dir);
         error = nfs_proc_rmdir(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
                                 dentry->d_name.name);
  
@@ -947,7 +1030,7 @@ dentry->d_parent->d_name.name, dentry->d_name.name);
         } while(sdentry->d_inode != NULL); /* need negative lookup */
  
         invalidate_inode_pages(dir);
-       nfs_invalidate_dircache(dir);
+       nfs_flush_dircache(dir);
         error = nfs_proc_rename(NFS_SERVER(dir),
                                 NFS_FH(dentry->d_parent), dentry->d_name.name,
                                 NFS_FH(dentry->d_parent), silly);
@@ -1017,7 +1100,7 @@ inode->i_count, inode->i_nlink);
                 d_delete(dentry);
         }
         invalidate_inode_pages(dir);
-       nfs_invalidate_dircache(dir);
+       nfs_flush_dircache(dir);
         error = nfs_proc_remove(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
                                 dentry->d_name.name);
         /*
@@ -1084,7 +1167,7 @@ dentry->d_parent->d_name.name, dentry->d_name.name);
          */
         d_drop(dentry);
         invalidate_inode_pages(dir);
-       nfs_invalidate_dircache(dir);
+       nfs_flush_dircache(dir);
         error = nfs_proc_symlink(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
                                 dentry->d_name.name, symname, &sattr);
         if (!error) {
@@ -1115,7 +1198,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
          */
         d_drop(dentry);
         invalidate_inode_pages(dir);
-       nfs_invalidate_dircache(dir);
+       nfs_flush_dircache(dir);
         error = nfs_proc_link(NFS_DSERVER(old_dentry), NFS_FH(old_dentry),
                                 NFS_FH(dentry->d_parent), dentry->d_name.name);
         if (!error) {
@@ -1261,9 +1344,9 @@ new_inode->i_count, new_inode->i_nlink);
         }
  
         invalidate_inode_pages(new_dir);
-       nfs_invalidate_dircache(new_dir);
+       nfs_flush_dircache(new_dir);
         invalidate_inode_pages(old_dir);
-       nfs_invalidate_dircache(old_dir);
+       nfs_flush_dircache(old_dir);
         error = nfs_proc_rename(NFS_DSERVER(old_dentry),
                         NFS_FH(old_dentry->d_parent), old_dentry->d_name.name,
                         NFS_FH(new_dentry->d_parent), new_dentry->d_name.name);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c

index c7e684763f2afc8a1ea7aa81af83bd86249694d2..5421cebf99a8079cf0bceda607c9f615b41e61f4 100644 (file)
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -99,23 +99,28 @@ nfs_delete_inode(struct inode * inode)
         int failed;
  
         dprintk("NFS: delete_inode(%x/%ld)\n", inode->i_dev, inode->i_ino);
-       /*
-        * Flush out any pending write requests ...
-        */
-       if (NFS_WRITEBACK(inode) != NULL) {
-               unsigned long timeout = jiffies + 5*HZ;
+
+       if (S_ISDIR(inode->i_mode)) {
+               nfs_free_dircache(inode);
+       } else {
+               /*
+                * Flush out any pending write requests ...
+                */
+               if (NFS_WRITEBACK(inode) != NULL) {
+                       unsigned long timeout = jiffies + 5*HZ;
  #ifdef NFS_DEBUG_VERBOSE
  printk("nfs_delete_inode: inode %ld has pending RPC requests\n", inode->i_ino);
  #endif
-               nfs_inval(inode);
-               while (NFS_WRITEBACK(inode) != NULL &&
-                      time_before(jiffies, timeout)) {
-                       current->state = TASK_INTERRUPTIBLE;
-                       schedule_timeout(HZ/10);
+                       nfs_inval(inode);
+                       while (NFS_WRITEBACK(inode) != NULL &&
+                              time_before(jiffies, timeout)) {
+                               current->state = TASK_INTERRUPTIBLE;
+                               schedule_timeout(HZ/10);
+                       }
+                       current->state = TASK_RUNNING;
+                       if (NFS_WRITEBACK(inode) != NULL)
+                               printk("NFS: Arghhh, stuck RPC requests!\n");
                 }
-               current->state = TASK_RUNNING;
-               if (NFS_WRITEBACK(inode) != NULL)
-                       printk("NFS: Arghhh, stuck RPC requests!\n");
         }
  
         failed = nfs_check_failed_request(inode);
@@ -433,7 +438,7 @@ nfs_zap_caches(struct inode *inode)
  
         invalidate_inode_pages(inode);
         if (S_ISDIR(inode->i_mode))
-               nfs_invalidate_dircache(inode);
+               nfs_flush_dircache(inode);
  }
  
  /*
@@ -477,8 +482,6 @@ nfs_fill_inode(struct inode *inode, struct nfs_fattr *fattr)
                 inode->i_size  = fattr->size;
                 inode->i_mtime = fattr->mtime.seconds;
                 NFS_OLDMTIME(inode) = fattr->mtime.seconds;
-               NFS_COOKIES(inode) = NULL;
-               NFS_WRITEBACK(inode) = NULL;
         }
         nfs_refresh_inode(inode, fattr);
  }
diff --git a/fs/pipe.c b/fs/pipe.c

index 90b5df368d4f190987d3148b36a8cb5f1aae7dca..dd4f6cd196656b3a6f37699457f11226f0af9c73 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -102,9 +102,7 @@ static ssize_t pipe_write(struct file * filp, const char * buf,
                 free = count;
         else
                 free = 1; /* can't do it atomically, wait for any free space */
-       up(&inode->i_sem);
-       if (down_interruptible(&inode->i_atomic_write)) {
-               down(&inode->i_sem);
+       if (down_interruptible(&inode->i_sem)) {
                 return -ERESTARTSYS;
         }
         while (count>0) {
@@ -145,8 +143,7 @@ static ssize_t pipe_write(struct file * filp, const char * buf,
         inode->i_ctime = inode->i_mtime = CURRENT_TIME;
         mark_inode_dirty(inode);
  errout:
-       up(&inode->i_atomic_write);
-       down(&inode->i_sem);
+       up(&inode->i_sem);
         return written ? written : err;
  }
  
@@ -254,6 +251,7 @@ static int pipe_release(struct inode * inode)
                 inode->i_pipe = NULL;
                 free_page((unsigned long) info->base);
                 kfree(info);
+               return 0;
         }
         wake_up_interruptible(&PIPE_WAIT(*inode));
         return 0;
diff --git a/fs/read_write.c b/fs/read_write.c

index 7b9bf0bf759393d09479411abad843d2b6ac87ef..c7ea90a69d757396e5662e9858be22ce941bdb32 100644 (file)
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -166,9 +166,7 @@ asmlinkage ssize_t sys_write(unsigned int fd, const char * buf, size_t count)
         if (!file->f_op || !(write = file->f_op->write))
                 goto out;
  
-       down(&inode->i_sem);
         ret = write(file, buf, count, &file->f_pos);
-       up(&inode->i_sem);
  out:
         fput(file);
  bad_file:
@@ -304,9 +302,7 @@ asmlinkage ssize_t sys_writev(unsigned long fd, const struct iovec * vector,
         if (!file)
                 goto bad_file;
         if (file->f_op && file->f_op->write && (file->f_mode & FMODE_WRITE)) {
-               down(&file->f_dentry->d_inode->i_sem);
                 ret = do_readv_writev(VERIFY_READ, file, vector, count);
-               up(&file->f_dentry->d_inode->i_sem);
         }
         fput(file);
  
@@ -376,10 +372,7 @@ asmlinkage ssize_t sys_pwrite(unsigned int fd, const char * buf,
         if (pos < 0)
                 goto out;
  
-       down(&file->f_dentry->d_inode->i_sem);
         ret = write(file, buf, count, &pos);
-       up(&file->f_dentry->d_inode->i_sem);
-
  out:
         fput(file);
  bad_file:
diff --git a/fs/sysv/file.c b/fs/sysv/file.c

index d60be8fa5ebf50ad70a9263a19ca994de46a2579..9e806e4d1375da294f6c8d8b8035363b9926826e 100644 (file)
--- a/fs/sysv/file.c
+++ b/fs/sysv/file.c
@@ -33,7 +33,51 @@
  #include <linux/fs.h>
  #include <linux/sysv_fs.h>
  
-static ssize_t sysv_file_write(struct file *, const char *, size_t, loff_t *);
+static int sysv_writepage (struct file * file, struct page * page)
+{
+       struct dentry *dentry = file->f_dentry;
+       struct inode *inode = dentry->d_inode;
+       unsigned long block;
+       int *p, nr[PAGE_SIZE/512];
+       int i, err, created;
+       struct buffer_head *bh;
+
+       i = PAGE_SIZE >> inode->i_sb->sv_block_size_bits;
+       block = page->offset >> inode->i_sb->sv_block_size_bits;
+       p = nr;
+       bh = page->buffers;
+       do {
+               if (bh && bh->b_blocknr)
+                       *p = bh->b_blocknr;
+               else
+                       *p = sysv_getblk_block (inode, block, 1, &err, &created);
+               if (!*p)
+                       return -EIO;
+               i--;
+               block++;
+               p++;
+               if (bh)
+                       bh = bh->b_this_page;
+       } while (i > 0);
+
+       /* IO start */
+       brw_page(WRITE, page, inode->i_dev, nr, inode->i_sb->sv_block_size, 1);
+       return 0;
+}
+
+static long sysv_write_one_page (struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf)
+{
+       return block_write_one_page(file, page, offset, bytes, buf, sysv_getblk_block);
+}
+
+/*
+ * Write to a file (through the page cache).
+ */
+static ssize_t
+sysv_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
+{
+       return generic_file_write(file, buf, count, ppos, sysv_write_one_page);
+}
  
  /*
   * We have mostly NULLs here: the current defaults are OK for
@@ -41,7 +85,7 @@ static ssize_t sysv_file_write(struct file *, const char *, size_t, loff_t *);
   */
  static struct file_operations sysv_file_operations = {
         NULL,                   /* lseek - default */
-       sysv_file_read,         /* read */
+       generic_file_read,      /* read */
         sysv_file_write,        /* write */
         NULL,                   /* readdir - bad */
         NULL,                   /* poll - default */
@@ -50,7 +94,10 @@ static struct file_operations sysv_file_operations = {
         NULL,                   /* no special open is needed */
         NULL,                   /* flush */
         NULL,                   /* release */
-       sysv_sync_file          /* fsync */
+       sysv_sync_file,         /* fsync */
+       NULL,                   /* fasync */
+       NULL,                   /* check_media_change */
+       NULL                    /* revalidate */
  };
  
  struct inode_operations sysv_file_inode_operations = {
@@ -67,208 +114,12 @@ struct inode_operations sysv_file_inode_operations = {
         NULL,                   /* readlink */
         NULL,                   /* follow_link */
         generic_readpage,       /* readpage */
-       NULL,                   /* writepage */
+       sysv_writepage,         /* writepage */
         sysv_bmap,              /* bmap */
         sysv_truncate,          /* truncate */
-       NULL                    /* permission */
+       NULL,                   /* permission */
+       NULL,                   /* smap */
+       NULL,                   /* updatepage */
+       NULL,                   /* revalidate */
+       generic_block_flushpage,/* flushpage */
  };
-
-ssize_t sysv_file_read(struct file * filp, char * buf, 
-                      size_t count, loff_t *ppos)
-{
-       struct inode * inode = filp->f_dentry->d_inode;
-       struct super_block * sb = inode->i_sb;
-       ssize_t read,left,chars;
-       size_t block;
-       ssize_t blocks, offset;
-       int bhrequest, uptodate;
-       struct buffer_head ** bhb, ** bhe;
-       struct buffer_head * bhreq[NBUF];
-       struct buffer_head * buflist[NBUF];
-       size_t size;
-
-       if (!inode) {
-               printk("sysv_file_read: inode = NULL\n");
-               return -EINVAL;
-       }
-       if (!S_ISREG(inode->i_mode)) {
-               printk("sysv_file_read: mode = %07o\n",inode->i_mode);
-               return -EINVAL;
-       }
-       offset = *ppos;
-       size = inode->i_size;
-       if (offset > size)
-               left = 0;
-       else
-               left = size - offset;
-       if (left > count)
-               left = count;
-       if (left <= 0)
-               return 0;
-       read = 0;
-       block = offset >> sb->sv_block_size_bits;
-       offset &= sb->sv_block_size_1;
-       size = (size + sb->sv_block_size_1) >> sb->sv_block_size_bits;
-       blocks = (left + offset + sb->sv_block_size_1) >> sb->sv_block_size_bits;
-       bhb = bhe = buflist;
-       if (filp->f_reada) {
-               blocks += read_ahead[MAJOR(inode->i_dev)] >> (sb->sv_block_size_bits - 9);
-               if (block + blocks > size)
-                       blocks = size - block;
-       }
-
-       /* We do this in a two stage process.  We first try to request
-          as many blocks as we can, then we wait for the first one to
-          complete, and then we try to wrap up as many as are actually
-          done.  This routine is rather generic, in that it can be used
-          in a filesystem by substituting the appropriate function in
-          for getblk.
-
-          This routine is optimized to make maximum use of the various
-          buffers and caches.
-        */
-
-       do {
-               bhrequest = 0;
-               uptodate = 1;
-               while (blocks) {
-                       --blocks;
-                       *bhb = sysv_getblk(inode, block++, 0);
-                       if (*bhb && !buffer_uptodate(*bhb)) {
-                               uptodate = 0;
-                               bhreq[bhrequest++] = *bhb;
-                       }
-
-                       if (++bhb == &buflist[NBUF])
-                               bhb = buflist;
-
-                       /* If the block we have on hand is uptodate, go ahead
-                          and complete processing. */
-                       if (uptodate)
-                               break;
-                       if (bhb == bhe)
-                               break;
-               }
-
-               /* Now request them all */
-               if (bhrequest)
-                       ll_rw_block(READ, bhrequest, bhreq);
-
-               do { /* Finish off all I/O that has actually completed */
-                       if (*bhe) {
-                               wait_on_buffer(*bhe);
-                               if (!buffer_uptodate(*bhe)) {   /* read error? */
-                                       brelse(*bhe);
-                                       if (++bhe == &buflist[NBUF])
-                                               bhe = buflist;
-                                       left = 0;
-                                       break;
-                               }
-                       }
-                       if (left < sb->sv_block_size - offset)
-                               chars = left;
-                       else
-                               chars = sb->sv_block_size - offset;
-                       *ppos += chars;
-                       left -= chars;
-                       read += chars;
-                       if (*bhe) {
-                               copy_to_user(buf,offset+(*bhe)->b_data,chars);
-                               brelse(*bhe);
-                               buf += chars;
-                       } else {
-                               while (chars-- > 0)
-                                       put_user(0,buf++);
-                       }
-                       offset = 0;
-                       if (++bhe == &buflist[NBUF])
-                               bhe = buflist;
-               } while (left > 0 && bhe != bhb && (!*bhe || !buffer_locked(*bhe)));
-       } while (left > 0);
-
-/* Release the read-ahead blocks */
-       while (bhe != bhb) {
-               brelse(*bhe);
-               if (++bhe == &buflist[NBUF])
-                       bhe = buflist;
-       };
-       if (!read)
-               return -EIO;
-       filp->f_reada = 1;
-       if (!IS_RDONLY(inode)) {
-               inode->i_atime = CURRENT_TIME;
-               mark_inode_dirty(inode);
-       }
-       return read;
-}
-
-static ssize_t sysv_file_write(struct file * filp, const char * buf,
-                              size_t count, loff_t *ppos)
-{
-       struct inode * inode = filp->f_dentry->d_inode;
-       struct super_block * sb = inode->i_sb;
-       off_t pos;
-       ssize_t written, c;
-       struct buffer_head * bh;
-       char * p;
-
-       if (!inode) {
-               printk("sysv_file_write: inode = NULL\n");
-               return -EINVAL;
-       }
-       if (!S_ISREG(inode->i_mode)) {
-               printk("sysv_file_write: mode = %07o\n",inode->i_mode);
-               return -EINVAL;
-       }
-/*
- * OK, append may not work when many processes are writing at the same time
- * but so what. That way leads to madness anyway.
- * But we need to protect against simultaneous truncate as we may end up
- * writing our data into blocks that have meanwhile been incorporated into
- * the freelist, thereby trashing the freelist.
- */
-       if (filp->f_flags & O_APPEND)
-               pos = inode->i_size;
-       else
-               pos = *ppos;
-       written = 0;
-       while (written<count) {
-               bh = sysv_getblk (inode, pos >> sb->sv_block_size_bits, 1);
-               if (!bh) {
-                       if (!written)
-                               written = -ENOSPC;
-                       break;
-               }
-               c = sb->sv_block_size - (pos & sb->sv_block_size_1);
-               if (c > count-written)
-                       c = count-written;
-               if (c != sb->sv_block_size && !buffer_uptodate(bh)) {
-                       ll_rw_block(READ, 1, &bh);
-                       wait_on_buffer(bh);
-                       if (!buffer_uptodate(bh)) {
-                               brelse(bh);
-                               if (!written)
-                                       written = -EIO;
-                               break;
-                       }
-               }
-               /* now either c==sb->sv_block_size or buffer_uptodate(bh) */
-               p = (pos & sb->sv_block_size_1) + bh->b_data;
-               copy_from_user(p, buf, c);
-               update_vm_cache(inode, pos, p, c);
-               pos += c;
-               if (pos > inode->i_size) {
-                       inode->i_size = pos;
-                       mark_inode_dirty(inode);
-               }
-               written += c;
-               buf += c;
-               mark_buffer_uptodate(bh, 1);
-               mark_buffer_dirty(bh, 0);
-               brelse(bh);
-       }
-       inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-       *ppos = pos;
-       mark_inode_dirty(inode);
-       return written;
-}
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c

index f8d508c3de5a4e585e3a3e0cdccd0c62b3549e26..d335b5b501ec352398252d0b5b2d2c92297b3ce6 100644 (file)
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -657,7 +657,8 @@ int sysv_bmap(struct inode * inode,int block_nr)
  
  /* Access selected blocks of regular files (or directories) */
  
-static struct buffer_head * inode_getblk(struct inode * inode, int nr, int create)
+static struct buffer_head * inode_getblk(struct inode * inode, int nr, int create,
+                                        int metadata, int *phys_block, int *created)
  {
         struct super_block *sb;
         u32 tmp;
@@ -669,31 +670,48 @@ static struct buffer_head * inode_getblk(struct inode * inode, int nr, int creat
  repeat:
         tmp = *p;
         if (tmp) {
-               result = sv_getblk(sb, inode->i_dev, tmp);
-               if (tmp == *p)
-                       return result;
-               brelse(result);
-               goto repeat;
+               if (metadata) {
+                       result = sv_getblk(sb, inode->i_dev, tmp);
+                       if (tmp == *p)
+                               return result;
+                       brelse(result);
+                       goto repeat;
+               } else {
+                       *phys_block = tmp;
+                       return NULL;
+               }
         }
         if (!create)
                 return NULL;
         tmp = sysv_new_block(sb);
         if (!tmp)
                 return NULL;
-       result = sv_getblk(sb, inode->i_dev, tmp);
-       if (*p) {
-               sysv_free_block(sb,tmp);
-               brelse(result);
-               goto repeat;
+       if (metadata) {
+               result = sv_getblk(sb, inode->i_dev, tmp);
+               if (*p) {
+                       sysv_free_block(sb, tmp);
+                       brelse(result);
+                       goto repeat;
+               }
+       } else {
+               if (*p) {
+                       sysv_free_block(sb, tmp);
+                       goto repeat;
+               }
+               *phys_block = tmp;
+               result = NULL;
+               *created = 1;
         }
         *p = tmp;
+
         inode->i_ctime = CURRENT_TIME;
         mark_inode_dirty(inode);
         return result;
  }
  
  static struct buffer_head * block_getblk(struct inode * inode,
-       struct buffer_head * bh, int nr, int create)
+       struct buffer_head * bh, int nr, int create,
+       int metadata, int *phys_block, int *created)
  {
         struct super_block *sb;
         u32 tmp, block;
@@ -717,13 +735,19 @@ repeat:
         if (sb->sv_convert)
                 block = from_coh_ulong(block);
         if (tmp) {
-               result = sv_getblk(sb, bh->b_dev, block);
-               if (tmp == *p) {
+               if (metadata) {
+                       result = sv_getblk(sb, bh->b_dev, block);
+                       if (tmp == *p) {
+                               brelse(bh);
+                               return result;
+                       }
+                       brelse(result);
+                       goto repeat;
+               } else {
+                       *phys_block = tmp;
                         brelse(bh);
-                       return result;
+                       return NULL;
                 }
-               brelse(result);
-               goto repeat;
         }
         if (!create) {
                 brelse(bh);
@@ -734,11 +758,17 @@ repeat:
                 brelse(bh);
                 return NULL;
         }
-       result = sv_getblk(sb, bh->b_dev, block);
-       if (*p) {
-               sysv_free_block(sb,block);
-               brelse(result);
-               goto repeat;
+       if (metadata) {
+               result = sv_getblk(sb, bh->b_dev, block);
+               if (*p) {
+                       sysv_free_block(sb,block);
+                       brelse(result);
+                       goto repeat;
+               }
+       } else {
+               *phys_block = tmp;
+               result = NULL;
+               *created = 1;
         }
         *p = (sb->sv_convert ? to_coh_ulong(block) : block);
         mark_buffer_dirty(bh, 1);
@@ -746,37 +776,74 @@ repeat:
         return result;
  }
  
-struct buffer_head * sysv_getblk(struct inode * inode, unsigned int block, int create)
+int sysv_getblk_block(struct inode *inode, long block, int create,
+                     int *err, int *created)
  {
-       struct super_block * sb = inode->i_sb;
-       struct buffer_head * bh;
+       struct super_block *sb = inode->i_sb;
+       struct buffer_head *bh, *tmp;
+       int phys_block;
  
-       if (block < 10)
-               return inode_getblk(inode,block,create);
+       *err = -EIO;
+       if (block < 0) {
+               printk("sysv_getblk: block<0");
+               return 0;
+       }
+       if (block > sb->sv_ind_per_block_3) {
+               printk("sysv_getblk: block>big");
+               return 0;
+       }
+       if (block < 10) {
+               tmp = inode_getblk(inode, block, create,
+                                  0, &phys_block, created);
+               goto out;
+       }
         block -= 10;
         if (block < sb->sv_ind_per_block) {
-               bh = inode_getblk(inode,10,create);
-               return block_getblk(inode, bh, block, create);
+               bh = inode_getblk(inode, 10, create, 1, NULL, NULL);
+               tmp = block_getblk(inode, bh, block, create,
+                                  0, &phys_block, created);
+               goto out;
         }
         block -= sb->sv_ind_per_block;
         if (block < sb->sv_ind_per_block_2) {
-               bh = inode_getblk(inode,11,create);
-               bh = block_getblk(inode, bh, block >> sb->sv_ind_per_block_bits, create);
-               return block_getblk(inode, bh, block & sb->sv_ind_per_block_1, create);
+               bh = inode_getblk(inode, 11, create, 1, NULL, NULL);
+               bh = block_getblk(inode, bh, block >> sb->sv_ind_per_block_bits, create,
+                                 1, NULL, NULL);
+               tmp = block_getblk(inode, bh, block & sb->sv_ind_per_block_1, create,
+                                  0, &phys_block, created);
+               goto out;
         }
         block -= sb->sv_ind_per_block_2;
-       if (block < sb->sv_ind_per_block_3) {
-               bh = inode_getblk(inode,12,create);
-               bh = block_getblk(inode, bh, block >> sb->sv_ind_per_block_2_bits, create);
-               bh = block_getblk(inode, bh, (block >> sb->sv_ind_per_block_bits) & sb->sv_ind_per_block_1, create);
-               return block_getblk(inode, bh, block & sb->sv_ind_per_block_1, create);
-       }
-       if ((int)block<0) {
-               printk("sysv_getblk: block<0");
-               return NULL;
+       bh = inode_getblk(inode, 12, create, 1, NULL, NULL);
+       bh = block_getblk(inode, bh, block >> sb->sv_ind_per_block_2_bits, create,
+                         1, NULL, NULL);
+       bh = block_getblk(inode, bh,
+                         (block >> sb->sv_ind_per_block_bits) & sb->sv_ind_per_block_1,
+                         create, 1, NULL, NULL);
+       tmp = block_getblk(inode, bh, block & sb->sv_ind_per_block_1, create,
+                          0, &phys_block, created);
+
+out:
+       *err = 0;
+       return phys_block;
+}
+
+struct buffer_head *sysv_getblk (struct inode *inode, unsigned int block, int create)
+{
+       struct buffer_head *tmp = NULL;
+       int phys_block;
+       int err, created;
+
+       phys_block = sysv_getblk_block(inode, block, create, &err, &created);
+       if (phys_block) {
+               tmp = getblk(inode->i_dev, phys_block, BLOCK_SIZE);
+               if (created) {
+                       memset(tmp->b_data, 0, BLOCK_SIZE);
+                       mark_buffer_uptodate(tmp, 1);
+                       mark_buffer_dirty(tmp, 1);
+               }
         }
-       printk("sysv_getblk: block>big");
-       return NULL;
+       return tmp;
  }
  
  struct buffer_head * sysv_file_bread(struct inode * inode, int block, int create)
diff --git a/fs/sysv/truncate.c b/fs/sysv/truncate.c

index c318648a99bf1380436a7332daa356d55f096fae..a8c0e074561c60ff8f37688e3bfc9e45869c47e7 100644 (file)
--- a/fs/sysv/truncate.c
+++ b/fs/sysv/truncate.c
@@ -35,6 +35,9 @@
   * general case (size = XXX). I hope.
   */
  
+#define DATA_BUFFER_USED(bh) \
+       ((bh->b_count > 1) || buffer_locked(bh))
+
  /* We throw away any data beyond inode->i_size. */
  
  static int trunc_direct(struct inode * inode)
@@ -58,7 +61,7 @@ repeat:
                         brelse(bh);
                         goto repeat;
                 }
-               if ((bh && bh->b_count != 1) || (block != *p)) {
+               if ((bh && DATA_BUFFER_USED(bh)) || (block != *p)) {
                         retry = 1;
                         brelse(bh);
                         continue;
@@ -115,7 +118,7 @@ repeat:
                         brelse(bh);
                         goto repeat;
                 }
-               if ((bh && bh->b_count != 1) || (tmp != *ind)) {
+               if ((bh && DATA_BUFFER_USED(bh)) || (tmp != *ind)) {
                         retry = 1;
                         brelse(bh);
                         continue;
@@ -128,7 +131,7 @@ repeat:
         for (i = 0; i < sb->sv_ind_per_block; i++)
                 if (((sysv_zone_t *) indbh->b_data)[i])
                         goto done;
-       if ((indbh->b_count != 1) || (indtmp != *p)) {
+       if (DATA_BUFFER_USED(indbh) || (indtmp != *p)) {
                 brelse(indbh);
                 return 1;
         }
@@ -185,7 +188,7 @@ static int trunc_dindirect(struct inode * inode, unsigned long offset, sysv_zone
         for (i = 0; i < sb->sv_ind_per_block; i++)
                 if (((sysv_zone_t *) indbh->b_data)[i])
                         goto done;
-       if ((indbh->b_count != 1) || (indtmp != *p)) {
+       if (DATA_BUFFER_USED(indbh) || (indtmp != *p)) {
                 brelse(indbh);
                 return 1;
         }
@@ -242,7 +245,7 @@ static int trunc_tindirect(struct inode * inode, unsigned long offset, sysv_zone
         for (i = 0; i < sb->sv_ind_per_block; i++)
                 if (((sysv_zone_t *) indbh->b_data)[i])
                         goto done;
-       if ((indbh->b_count != 1) || (indtmp != *p)) {
+       if (DATA_BUFFER_USED(indbh) || (indtmp != *p)) {
                 brelse(indbh);
                 return 1;
         }
diff --git a/fs/ufs/file.c b/fs/ufs/file.c

index 7e94bfd1c8acc3e0b4c58409a698d6e278ea4837..57db16baefa01ad0dadc18b05a0720ea4ad32f1c 100644 (file)
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -41,52 +41,6 @@
  #define MIN(a,b) (((a)<(b))?(a):(b))
  #define MAX(a,b) (((a)>(b))?(a):(b))
  
-static long long ufs_file_lseek(struct file *, long long, int);
-static ssize_t ufs_file_write (struct file *, const char *, size_t, loff_t *);
-static int ufs_release_file (struct inode *, struct file *);
-
-/*
- * We have mostly NULL's here: the current defaults are ok for
- * the ufs filesystem.
- */
-static struct file_operations ufs_file_operations = {
-       ufs_file_lseek, /* lseek */
-       generic_file_read,      /* read */
-       ufs_file_write,         /* write */
-       NULL,                   /* readdir - bad */
-       NULL,                   /* poll - default */
-       NULL,                   /* ioctl */
-       generic_file_mmap,      /* mmap */
-       NULL,                   /* no special open is needed */
-       NULL,                   /* flush */
-       ufs_release_file,       /* release */
-       NULL,                   /* fsync */
-       NULL,                   /* fasync */
-       NULL,                   /* check_media_change */
-       NULL                    /* revalidate */
-};
-
-struct inode_operations ufs_file_inode_operations = {
-       &ufs_file_operations,/* default file operations */
-       NULL,                   /* create */
-       NULL,                   /* lookup */
-       NULL,                   /* link */
-       NULL,                   /* unlink */
-       NULL,                   /* symlink */
-       NULL,                   /* mkdir */
-       NULL,                   /* rmdir */
-       NULL,                   /* mknod */
-       NULL,                   /* rename */
-       NULL,                   /* readlink */
-       NULL,                   /* follow_link */
-       generic_readpage,       /* readpage */
-       NULL,                   /* writepage */
-       ufs_bmap,               /* bmap */
-       ufs_truncate,           /* truncate */
-       NULL,                   /* permission */
-       NULL                    /* smap */
-};
-
  /*
   * Make sure the offset never goes beyond the 32-bit mark..
   */
@@ -133,139 +87,49 @@ static inline void remove_suid(struct inode *inode)
         }
  }
  
-static ssize_t ufs_file_write (
-       struct file * filp,
-       const char * buf,
-       size_t count,
-       loff_t *ppos )
+static int ufs_writepage (struct file *file, struct page *page)
  {
-       struct inode * inode = filp->f_dentry->d_inode;
-       __u32 pos;
-       long block;
-       int offset;
-       int written, c;
-       struct buffer_head * bh, *bufferlist[NBUF];
-       struct super_block * sb;
-       int err;
-       int i,buffercount,write_error;
-
-       /* POSIX: mtime/ctime may not change for 0 count */
-       if (!count)
-               return 0;
-       write_error = buffercount = 0;
-       if (!inode)
-               return -EINVAL;
-       sb = inode->i_sb;
-       if (sb->s_flags & MS_RDONLY)
-               /*
-                * This fs has been automatically remounted ro because of errors
-                */
-               return -ENOSPC;
-
-       if (!S_ISREG(inode->i_mode)) {
-               ufs_warning (sb, "ufs_file_write", "mode = %07o",
-                             inode->i_mode);
-               return -EINVAL;
-       }
-       remove_suid(inode);
-
-       if (filp->f_flags & O_APPEND)
-               pos = inode->i_size;
-       else {
-               pos = *ppos;
-               if (pos != *ppos)
-                       return -EINVAL;
-       }
-
-       /* Check for overflow.. */
-       if (pos > (__u32) (pos + count)) {
-               count = ~pos; /* == 0xFFFFFFFF - pos */
-               if (!count)
-                       return -EFBIG;
-       }
-
-       /*
-        * If a file has been opened in synchronous mode, we have to ensure
-        * that meta-data will also be written synchronously.  Thus, we
-        * set the i_osync field.  This field is tested by the allocation
-        * routines.
-        */
-       if (filp->f_flags & O_SYNC)
-               inode->u.ufs_i.i_osync++;
-       block = pos >> sb->s_blocksize_bits;
-       offset = pos & (sb->s_blocksize - 1);
-       c = sb->s_blocksize - offset;
-       written = 0;
+       struct dentry *dentry = file->f_dentry;
+       struct inode *inode = dentry->d_inode;
+       unsigned long block;
+       int *p, nr[PAGE_SIZE/512];
+       int i, err, created;
+       struct buffer_head *bh;
+
+       i = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
+       block = page->offset >> inode->i_sb->s_blocksize_bits;
+       p = nr;
+       bh = page->buffers;
         do {
-               bh = ufs_getfrag (inode, block, 1, &err);
-               if (!bh) {
-                       if (!written)
-                               written = err;
-                       break;
-               }
-               if (c > count)
-                       c = count;
-               if (c != sb->s_blocksize && !buffer_uptodate(bh)) {
-                       ll_rw_block (READ, 1, &bh);
-                       wait_on_buffer (bh);
-                       if (!buffer_uptodate(bh)) {
-                               brelse (bh);
-                               if (!written)
-                                       written = -EIO;
-                               break;
-                       }
-               }
-               c -= copy_from_user (bh->b_data + offset, buf, c);
-               if (!c) {
-                       brelse(bh);
-                       if (!written)
-                               written = -EFAULT;
-                       break;
-               }
-               update_vm_cache(inode, pos, bh->b_data + offset, c);
-               pos += c;
-               written += c;
-               buf += c;
-               count -= c;
-               mark_buffer_uptodate(bh, 1);
-               mark_buffer_dirty(bh, 0);
-               if (filp->f_flags & O_SYNC)
-                       bufferlist[buffercount++] = bh;
+               if (bh && bh->b_blocknr)
+                       *p = bh->b_blocknr;
                 else
-                       brelse(bh);
-               if (buffercount == NBUF){
-                       ll_rw_block(WRITE, buffercount, bufferlist);
-                       for(i=0; i<buffercount; i++){
-                               wait_on_buffer(bufferlist[i]);
-                               if (!buffer_uptodate(bufferlist[i]))
-                                       write_error=1;
-                               brelse(bufferlist[i]);
-                       }
-                       buffercount=0;
-               }
-               if (write_error)
-                       break;
+                       *p = ufs_getfrag_block(inode, block, 1, &err, &created);
+               if (!*p)
+                       return -EIO;
+               i--;
                 block++;
-               offset = 0;
-               c = sb->s_blocksize;
-       } while (count);
-       if (buffercount){
-               ll_rw_block(WRITE, buffercount, bufferlist);
-               for (i=0; i<buffercount; i++){
-                       wait_on_buffer(bufferlist[i]);
-                       if (!buffer_uptodate(bufferlist[i]))
-                               write_error=1;
-                       brelse(bufferlist[i]);
-               }
-       }               
-       if (pos > inode->i_size)
-               inode->i_size = pos;
-       if (filp->f_flags & O_SYNC)
-               inode->u.ufs_i.i_osync--;
-       inode->i_ctime = inode->i_mtime = CURRENT_TIME;
-       *ppos = pos;
-       mark_inode_dirty(inode);
-       return written;
+               p++;
+               if (bh)
+                       bh = bh->b_this_page;
+       } while (i > 0);
+
+       brw_page(WRITE, page, inode->i_dev, nr, inode->i_sb->s_blocksize, 1);
+       return 0;
+}
+
+static long ufs_write_one_page(struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char *buf)
+{
+       return block_write_one_page(file, page, offset, bytes, buf, ufs_getfrag_block);
+}
+
+/*
+ * Write to a file (through the page cache).
+ */
+static ssize_t
+ufs_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
+{
+       return generic_file_write(file, buf, count, ppos, ufs_write_one_page);
  }
  
  /*
@@ -277,3 +141,48 @@ static int ufs_release_file (struct inode * inode, struct file * filp)
  {
         return 0;
  }
+
+/*
+ * We have mostly NULL's here: the current defaults are ok for
+ * the ufs filesystem.
+ */
+static struct file_operations ufs_file_operations = {
+       ufs_file_lseek, /* lseek */
+       generic_file_read,      /* read */
+       ufs_file_write,         /* write */
+       NULL,                   /* readdir - bad */
+       NULL,                   /* poll - default */
+       NULL,                   /* ioctl */
+       generic_file_mmap,      /* mmap */
+       NULL,                   /* no special open is needed */
+       NULL,                   /* flush */
+       ufs_release_file,       /* release */
+       NULL,                   /* fsync */
+       NULL,                   /* fasync */
+       NULL,                   /* check_media_change */
+       NULL                    /* revalidate */
+};
+
+struct inode_operations ufs_file_inode_operations = {
+       &ufs_file_operations,/* default file operations */
+       NULL,                   /* create */
+       NULL,                   /* lookup */
+       NULL,                   /* link */
+       NULL,                   /* unlink */
+       NULL,                   /* symlink */
+       NULL,                   /* mkdir */
+       NULL,                   /* rmdir */
+       NULL,                   /* mknod */
+       NULL,                   /* rename */
+       NULL,                   /* readlink */
+       NULL,                   /* follow_link */
+       generic_readpage,       /* readpage */
+       ufs_writepage,          /* writepage */
+       ufs_bmap,               /* bmap */
+       ufs_truncate,           /* truncate */
+       NULL,                   /* permission */
+       NULL,                   /* smap */
+       NULL,                   /* updatepage */
+       NULL,                   /* revalidate */
+       generic_block_flushpage,/* flushpage */
+};
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c

index a5a51bac52526daddb7ccbc71b7c64fc5469682d..636b0aabd551a004ec1f7290344a163f0644e1b7 100644 (file)
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -175,7 +175,7 @@ int ufs_bmap (struct inode * inode, int fragment)
  
  static struct buffer_head * ufs_inode_getfrag (struct inode * inode, 
         unsigned fragment, unsigned new_fragment, int create, 
-       unsigned required, int * err )
+       unsigned required, int *err, int metadata, int *phys_block, int *created)
  {
         struct super_block * sb;
         struct ufs_sb_private_info * uspi;
@@ -201,13 +201,19 @@ repeat:
         tmp = SWAB32(*p);
         lastfrag = inode->u.ufs_i.i_lastfrag;
         if (tmp && fragment < lastfrag) {
-               result = getblk (sb->s_dev, uspi->s_sbbase + tmp + blockoff, sb->s_blocksize);
-               if (tmp == SWAB32(*p)) {
-                       UFSD(("EXIT, result %u\n", tmp + blockoff))
-                       return result;
+               if (metadata) {
+                       result = getblk (sb->s_dev, uspi->s_sbbase + tmp + blockoff,
+                                        sb->s_blocksize);
+                       if (tmp == SWAB32(*p)) {
+                               UFSD(("EXIT, result %u\n", tmp + blockoff))
+                                       return result;
+                       }
+                       brelse (result);
+                       goto repeat;
+               } else {
+                       *phys_block = tmp;
+                       return NULL;
                 }
-               brelse (result);
-               goto repeat;
         }
         *err = -EFBIG;
         if (!create)
@@ -269,7 +275,20 @@ repeat:
                 else
                         return NULL;
         }
-       result = getblk (inode->i_dev, tmp + blockoff, sb->s_blocksize);
+
+       /* The nullification of framgents done in ufs/balloc.c is
+        * something I don't have the stomache to move into here right
+        * now. -DaveM
+        */
+       if (metadata) {
+               result = getblk (inode->i_dev, tmp + blockoff, sb->s_blocksize);
+       } else {
+               *phys_block = tmp;
+               result = NULL;
+               *err = 0;
+               *created = 1;
+       }
+
         inode->i_ctime = CURRENT_TIME;
         if (IS_SYNC(inode))
                 ufs_sync_inode (inode);
@@ -280,7 +299,7 @@ repeat:
  
  static struct buffer_head * ufs_block_getfrag (struct inode * inode,
         struct buffer_head * bh, unsigned fragment, unsigned new_fragment, 
-       int create, unsigned blocksize, int * err)
+       int create, unsigned blocksize, int * err, int metadata, int *phys_block, int *created)
  {
         struct super_block * sb;
         struct ufs_sb_private_info * uspi;
@@ -312,19 +331,36 @@ static struct buffer_head * ufs_block_getfrag (struct inode * inode,
  repeat:
         tmp = SWAB32(*p);
         if (tmp) {
-               result = getblk (bh->b_dev, uspi->s_sbbase + tmp + blockoff, sb->s_blocksize);
-               if (tmp == SWAB32(*p)) {
+               if (metadata) {
+                       result = getblk (bh->b_dev, uspi->s_sbbase + tmp + blockoff,
+                                        sb->s_blocksize);
+                       if (tmp == SWAB32(*p)) {
+                               brelse (bh);
+                               UFSD(("EXIT, result %u\n", tmp + blockoff))
+                               return result;
+                       }
+                       brelse (result);
+                       goto repeat;
+               } else {
+                       *phys_block = tmp;
                         brelse (bh);
-                       UFSD(("EXIT, result %u\n", tmp + blockoff))
-                       return result;
+                       return NULL;
                 }
-               brelse (result);
-               goto repeat;
         }
-       if (!create || new_fragment >= (current->rlim[RLIMIT_FSIZE].rlim_cur >> sb->s_blocksize)) {
+       *err = -EFBIG;
+       if (!create) {
                 brelse (bh);
-               *err = -EFBIG;
                 return NULL;
+       } else {
+               unsigned long limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
+               if (limit < RLIM_INFINITY) {
+                       limit >>= sb->s_blocksize_bits;
+                       if (new_fragment >= limit) {
+                               brelse (bh);
+                               send_sig(SIGXFSZ, current, 0);
+                               return NULL;
+                       }
+               }
         }
         if (block && (tmp = SWAB32(((u32*)bh->b_data)[block-1]) + uspi->s_fpb))
                 goal = tmp + uspi->s_fpb;
@@ -334,12 +370,25 @@ repeat:
         if (!tmp) {
                 if (SWAB32(*p)) {
                         goto repeat;
-               }
-               else {
+               } else {
+                       brelse (bh);
                         return NULL;
                 }
         }               
-       result = getblk (bh->b_dev, tmp + blockoff, sb->s_blocksize);
+
+       /* The nullification of framgents done in ufs/balloc.c is
+        * something I don't have the stomache to move into here right
+        * now. -DaveM
+        */
+       if (metadata) {
+               result = getblk (bh->b_dev, tmp + blockoff, sb->s_blocksize);
+       } else {
+               *phys_block = tmp;
+               result = NULL;
+               *err = 0;
+               *created = 1;
+       }
+
         mark_buffer_dirty(bh, 1);
         if (IS_SYNC(inode)) {
                 ll_rw_block (WRITE, 1, &bh);
@@ -352,14 +401,15 @@ repeat:
         return result;
  }
  
-struct buffer_head * ufs_getfrag (struct inode * inode, unsigned fragment,
-       int create, int * err)
+int ufs_getfrag_block (struct inode * inode, long fragment,
+                      int create, int * err, int *created)
  {
         struct super_block * sb;
         struct ufs_sb_private_info * uspi;
-       struct buffer_head * bh;
+       struct buffer_head * bh, * tmp;
         unsigned f;
         unsigned swab;
+       int phys_block;
         
         sb = inode->i_sb;
         uspi = sb->u.ufs_sb.s_uspi;
@@ -367,19 +417,27 @@ struct buffer_head * ufs_getfrag (struct inode * inode, unsigned fragment,
         *err = -EIO;
  
         UFSD(("ENTER, ino %lu, fragment %u\n", inode->i_ino, fragment))
+       if (fragment < 0) {
+               ufs_warning (sb, "ufs_getblk", "block < 0");
+               return 0;
+       }
         if (fragment > ((UFS_NDADDR + uspi->s_apb + uspi->s_2apb + uspi->s_3apb) << uspi->s_fpbshift)) {
                 ufs_warning (sb, "ufs_getblk", "block > big");
-               return NULL;
+               return 0;
         }
  
         *err = -ENOSPC;
         f = fragment;
+       *created = 0;
           
         /*
          * Direct fragment
          */
-       if (fragment < UFS_NDIR_FRAGMENT)
-               return ufs_inode_getfrag (inode, fragment, fragment, create, 1, err);
+       if (fragment < UFS_NDIR_FRAGMENT) {
+               tmp = ufs_inode_getfrag (inode, fragment, fragment, create, 1,
+                                        err, 0, &phys_block, created);
+               goto out;
+       }
         /*
          * Indirect fragment
          */
@@ -387,10 +445,12 @@ struct buffer_head * ufs_getfrag (struct inode * inode, unsigned fragment,
         if (fragment < (1 << (uspi->s_apbshift + uspi->s_fpbshift))) {
                 bh = ufs_inode_getfrag (inode, 
                         UFS_IND_FRAGMENT + (fragment >> uspi->s_apbshift),
-                       f, create, uspi->s_fpb, err);
-               return ufs_block_getfrag (inode, bh, 
-                       fragment & uspi->s_apbmask,
-                       f, create, sb->s_blocksize, err);
+                       f, create, uspi->s_fpb, err, 1, NULL, NULL);
+               tmp = ufs_block_getfrag (inode, bh, 
+                                        fragment & uspi->s_apbmask,
+                                        f, create, sb->s_blocksize,
+                                        err, 0, &phys_block, created);
+               goto out;
         }
         /*
          * Dindirect fragment
@@ -398,14 +458,18 @@ struct buffer_head * ufs_getfrag (struct inode * inode, unsigned fragment,
         fragment -= 1 << (uspi->s_apbshift + uspi->s_fpbshift);
         if ( fragment < (1 << (uspi->s_2apbshift + uspi->s_fpbshift))) {
                 bh = ufs_inode_getfrag (inode,
-                       UFS_DIND_FRAGMENT + (fragment >> uspi->s_2apbshift), 
-                       f, create, uspi->s_fpb, err);
+                       UFS_DIND_FRAGMENT + (fragment >> uspi->s_2apbshift),
+                       f, create, uspi->s_fpb, err,
+                       1, NULL, NULL);
                 bh = ufs_block_getfrag (inode, bh,
                         (fragment >> uspi->s_apbshift) & uspi->s_apbmask, 
-                       f, create, sb->s_blocksize, err);
-               return ufs_block_getfrag (inode, bh, 
+                       f, create, sb->s_blocksize, err,
+                       1, NULL, NULL);
+               tmp = ufs_block_getfrag (inode, bh, 
                         fragment & uspi->s_apbmask,
-                       f, create, sb->s_blocksize, err);
+                       f, create, sb->s_blocksize, err,
+                       0, &phys_block, created);
+               goto out;
         }
         /*
          * Tindirect fragment
@@ -413,19 +477,42 @@ struct buffer_head * ufs_getfrag (struct inode * inode, unsigned fragment,
         fragment -= 1 << (uspi->s_2apbshift + uspi->s_fpbshift);
         bh = ufs_inode_getfrag (inode,
                 UFS_TIND_FRAGMENT + (fragment >> uspi->s_3apbshift), 
-               f, create, uspi->s_fpb, err);
+               f, create, uspi->s_fpb, err, 1, NULL, NULL);
         bh = ufs_block_getfrag (inode, bh,
                 (fragment >> uspi->s_2apbshift) & uspi->s_apbmask,
-               f, create, sb->s_blocksize, err);
+               f, create, sb->s_blocksize, err, 1, NULL, NULL);
         bh = ufs_block_getfrag (inode, bh,
                 (fragment >> uspi->s_apbshift) & uspi->s_apbmask, 
-               f, create, sb->s_blocksize, err);
-       return ufs_block_getfrag (inode, bh,
+               f, create, sb->s_blocksize, err, 1, NULL, NULL);
+       tmp = ufs_block_getfrag (inode, bh,
                 fragment & uspi->s_apbmask, 
-               f, create, sb->s_blocksize, err);
-}
+               f, create, sb->s_blocksize, err, 0, &phys_block, created);
  
+out:
+       if (!phys_block)
+               return 0;
+       if (*err)
+               return 0;
+       return phys_block;
+}
  
+struct buffer_head *ufs_getfrag(struct inode *inode, unsigned int fragment,
+                               int create, int *err)
+{
+       struct buffer_head *tmp = NULL;
+       int phys_block, created;
+
+       phys_block = ufs_getfrag_block(inode, fragment, create, err, &created);
+       if (phys_block) {
+               tmp = getblk(inode->i_dev, phys_block, inode->i_sb->s_blocksize);
+               if (created) {
+                       memset(tmp->b_data, 0, inode->i_sb->s_blocksize);
+                       mark_buffer_uptodate(tmp, 1);
+                       mark_buffer_dirty(tmp, 1);
+               }
+       }
+       return tmp;
+}
  
  struct buffer_head * ufs_bread (struct inode * inode, unsigned fragment,
         int create, int * err)
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c

index 3fec735a288da32729cd7d1172831412aa74a3ca..4649a42536f8247452ff8ac09cfb56c23b77c2c6 100644 (file)
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -62,6 +62,9 @@
  #define DIRECT_BLOCK howmany (inode->i_size, uspi->s_bsize)
  #define DIRECT_FRAGMENT howmany (inode->i_size, uspi->s_fsize)
  
+#define DATA_BUFFER_USED(bh) \
+       ((bh->b_count > 1) || buffer_locked(bh))
+
  static int ufs_trunc_direct (struct inode * inode)
  {
         struct super_block * sb;
@@ -114,7 +117,7 @@ static int ufs_trunc_direct (struct inode * inode)
         frag2 = ufs_fragnum (frag2);
         for (j = frag1; j < frag2; j++) {
                 bh = get_hash_table (sb->s_dev, tmp + j, uspi->s_fsize);
-               if ((bh && bh->b_count != 1) || tmp != SWAB32(*p)) {
+               if ((bh && DATA_BUFFER_USED(bh)) || tmp != SWAB32(*p)) {
                         retry = 1;
                         brelse (bh);
                         goto next1;
@@ -137,7 +140,7 @@ next1:
                         continue;
                 for (j = 0; j < uspi->s_fpb; j++) {
                         bh = get_hash_table (sb->s_dev, tmp + j, uspi->s_fsize);
-                       if ((bh && bh->b_count != 1) || tmp != SWAB32(*p)) {
+                       if ((bh && DATA_BUFFER_USED(bh)) || tmp != SWAB32(*p)) {
                                 retry = 1;
                                 brelse (bh);
                                 goto next2;
@@ -176,7 +179,7 @@ next2:
         frag4 = ufs_fragnum (frag4);
         for (j = 0; j < frag4; j++) {
                 bh = get_hash_table (sb->s_dev, tmp + j, uspi->s_fsize);
-               if ((bh && bh->b_count != 1) || tmp != SWAB32(*p)) {
+               if ((bh && DATA_BUFFER_USED(bh)) || tmp != SWAB32(*p)) {
                         retry = 1;
                         brelse (bh);
                         goto next1;
@@ -237,7 +240,7 @@ static int ufs_trunc_indirect (struct inode * inode, unsigned offset, u32 * p)
                         continue;
                 for (j = 0; j < uspi->s_fpb; j++) {
                         bh = get_hash_table (sb->s_dev, tmp + j, uspi->s_fsize);
-                       if ((bh && bh->b_count != 1) || tmp != SWAB32(*ind)) {
+                       if ((bh && DATA_BUFFER_USED(bh)) || tmp != SWAB32(*ind)) {
                                 retry = 1;
                                 brelse (bh);
                                 goto next;
diff --git a/include/linux/fs.h b/include/linux/fs.h

index 0beb57019d24d8e94a4754247ac56fd192acae8f..de4143c4820766dcf86ddded54cff38ba1609262 100644 (file)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -356,7 +356,6 @@ struct inode {
         unsigned long           i_version;
         unsigned long           i_nrpages;
         struct semaphore        i_sem;
-       struct semaphore        i_atomic_write;
         struct inode_operations *i_op;
         struct super_block      *i_sb;
         wait_queue_head_t       i_wait;
@@ -622,7 +621,7 @@ struct inode_operations {
         int (*smap) (struct inode *,int);
         int (*updatepage) (struct file *, struct page *, unsigned long, unsigned int);
         int (*revalidate) (struct dentry *);
-       int (*flushpage) (struct inode *, struct page *, int);
+       int (*flushpage) (struct inode *, struct page *, unsigned long);
  };
  
  struct super_operations {
@@ -750,15 +749,7 @@ extern struct file *inuse_filps;
  
  extern void set_writetime(struct buffer_head *, int);
  extern int try_to_free_buffers(struct page *);
-extern void __refile_buffer(struct buffer_head * buf);
-extern inline void refile_buffer(struct buffer_head * buf)
-{
-       /*
-        * Subtle, we do not want to refile not hashed buffers ...
-        */
-       if (buf->b_pprev)
-               __refile_buffer(buf);
-}
+extern void refile_buffer(struct buffer_head * buf);
  
  extern int buffermem;
  
@@ -881,8 +872,10 @@ extern int generic_readpage(struct file *, struct page *);
  extern int generic_file_mmap(struct file *, struct vm_area_struct *);
  extern ssize_t generic_file_read(struct file *, char *, size_t, loff_t *);
  extern ssize_t generic_file_write(struct file *, const char *, size_t, loff_t *, writepage_t);
-extern int generic_block_flushpage(struct inode *, struct page *, int);
-extern long block_write_one_page (struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf, fs_getblock_t fs_get_block);
+extern int generic_block_flushpage(struct inode *, struct page *, unsigned long);
+extern int block_write_one_page (struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf, fs_getblock_t fs_get_block);
+extern int block_write_full_page (struct file *file, struct page *page, fs_getblock_t fs_get_block);
+
  
  extern struct super_block *get_super(kdev_t);
  extern void put_super(kdev_t);
diff --git a/include/linux/minix_fs.h b/include/linux/minix_fs.h

index 4682ee56e49c8ca62fcbea883650b779e8c1c7f1..0b41889bd0bab941dc74e88e93811a769fe4c850 100644 (file)
--- a/include/linux/minix_fs.h
+++ b/include/linux/minix_fs.h
@@ -110,6 +110,7 @@ extern unsigned long minix_count_free_blocks(struct super_block *sb);
  extern int minix_bmap(struct inode *,int);
  
  extern struct buffer_head * minix_getblk(struct inode *, int, int);
+extern int minix_getblk_block (struct inode *, long, int, int *, int *);
  extern struct buffer_head * minix_bread(struct inode *, int, int);
  
  extern void minix_truncate(struct inode *);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h

index 613eb68722bb8bf9a0a8a5550ad3d0f723867ff2..d91a0b6410f427e052a60a228bc54b96ab6f5cb8 100644 (file)
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -192,7 +192,8 @@ extern struct inode_operations nfs_file_inode_operations;
   */
  extern struct inode_operations nfs_dir_inode_operations;
  extern struct dentry_operations nfs_dentry_operations;
-extern void nfs_invalidate_dircache(struct inode *);
+extern void nfs_flush_dircache(struct inode *);
+extern void nfs_free_dircache(struct inode *);
  
  /*
   * linux/fs/nfs/symlink.c
diff --git a/include/linux/sysv_fs.h b/include/linux/sysv_fs.h

index 49d9d24f163d51f1a350bb4bd6f75334587283ea..d8c6eef5cb3a5766c2b5b4179c7fbf1baca5130b 100644 (file)
--- a/include/linux/sysv_fs.h
+++ b/include/linux/sysv_fs.h
@@ -387,6 +387,7 @@ extern unsigned long sysv_count_free_blocks(struct super_block *sb);
  extern int sysv_bmap(struct inode *,int);
  
  extern struct buffer_head * sysv_getblk(struct inode *, unsigned int, int);
+extern int sysv_getblk_block(struct inode *, long, int, int *, int *);
  extern struct buffer_head * sysv_file_bread(struct inode *, int, int);
  extern ssize_t sysv_file_read(struct file *, char *, size_t, loff_t *);
  
diff --git a/include/linux/ufs_fs.h b/include/linux/ufs_fs.h

index e36ceba9e9ea2ae60d4ee9e8e188bd7b98c3b421..2124c7e57b96ed0724d7b517d918a613b32f076a 100644 (file)
--- a/include/linux/ufs_fs.h
+++ b/include/linux/ufs_fs.h
@@ -537,6 +537,7 @@ extern int ufs_sync_inode (struct inode *);
  extern void ufs_write_inode (struct inode *);
  extern void ufs_delete_inode (struct inode *);
  extern struct buffer_head * ufs_getfrag (struct inode *, unsigned, int, int *);
+extern int ufs_getfrag_block (struct inode *, long, int, int *, int *);
  extern struct buffer_head * ufs_bread (struct inode *, unsigned, int, int *);
  
  /* namei.c */
diff --git a/mm/filemap.c b/mm/filemap.c

index 2ad26debb47607c85e0ea4711e2bd1b1f581f644..b1ce3a46d7bf5abab1d6a4c18a191abc6f89b55d 100644 (file)
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -137,8 +137,6 @@ repeat:
                                 page_cache_release(page);
                                 goto repeat;
                         }
-                       if (page_count(page) != 2)
-                               printk("hm, busy page truncated? (not necesserily a bug)\n");
                         spin_unlock(&pagecache_lock);
  
                         if (inode->i_op->flushpage)
@@ -160,9 +158,6 @@ repeat:
                         page->prev = NULL;
                         remove_page_from_hash_queue(page);
                         page->inode = NULL;
-
-                       if (page_count(page) != 2)
-                               printk("hm, busy page truncated? (not necesserily a bug)\n");
                         spin_unlock(&pagecache_lock);
  
                         UnlockPage(page);
@@ -189,6 +184,13 @@ repeat:
                 /* partial truncate, clear end of page */
                 if (offset < PAGE_CACHE_SIZE) {
                         unsigned long address;
+                       get_page(page);
+                       if (TryLockPage(page)) {
+                               spin_unlock(&pagecache_lock);
+                               wait_on_page(page);
+                               page_cache_release(page);
+                               goto repeat;
+                       }
                         /*
                          * It's worth dropping the write lock only at
                          * this point. We are holding the page lock
@@ -200,10 +202,15 @@ repeat:
                         address = page_address(page);
                         memset((void *) (offset + address), 0, PAGE_CACHE_SIZE - offset);
                         flush_page_to_ram(address);
+
+                       if (inode->i_op->flushpage)
+                               inode->i_op->flushpage(inode, page, offset);
                         /*
-                        * we have dropped the lock so we have to
+                        * we have dropped the spinlock so we have to
                          * restart.
                          */
+                       UnlockPage(page);
+                       page_cache_release(page);
                         goto repeat;
                 }
         }
@@ -217,25 +224,9 @@ repeat:
   */
  void remove_inode_page(struct page *page)
  {
-       struct inode *inode = page->inode;
-
         if (!PageLocked(page))
                 PAGE_BUG(page);
  
-       /*
-        * We might sleep here. Other processes might arrive and sleep on
-        * the lock, but nobody is allowed to 'cross' the lock and get a
-        * reference to the page. We then remove the page from the hash
-        * before unlocking it. This mechanizm ensures that 1) nobody gets
-        * a half-freed page 2) nobody creates the same pagecache content
-        * before we finish destroying this page. This is not a
-        * performance problem as pages here are candidates for getting
-        * freed, ie. it's supposed to be unlikely that the above situation
-        * happens.
-        */
-       if (inode->i_op->flushpage)
-               inode->i_op->flushpage(inode, page, 1);
-
         spin_lock(&pagecache_lock);
         remove_page_from_inode_queue(page);
         remove_page_from_hash_queue(page);
@@ -274,12 +265,27 @@ int shrink_mmap(int priority, int gfp_mask)
                 
                 referenced = test_and_clear_bit(PG_referenced, &page->flags);
  
-               if (PageLocked(page))
+               if ((gfp_mask & __GFP_DMA) && !PageDMA(page))
                         continue;
  
-               if ((gfp_mask & __GFP_DMA) && !PageDMA(page))
+               if (PageLocked(page))
                         continue;
  
+               /* Is it a buffer page? */
+               if (page->buffers) {
+                       if (buffer_under_min())
+                               continue;
+
+                       if (TryLockPage(page))
+                               continue;
+                       err = try_to_free_buffers(page);
+                       UnlockPage(page);
+
+                       if (!err)
+                               continue;
+                       goto out;
+               }
+
                 /* We can't free pages unless there's just one user */
                 if (page_count(page) != 1)
                         continue;
@@ -309,13 +315,14 @@ int shrink_mmap(int priority, int gfp_mask)
                                 goto unlock_continue;
                         if (TryLockPage(page))
                                 goto unlock_continue;
-                       if (page_count(page) != 1) {
-                               UnlockPage(page);
-                               goto unlock_continue;
+
+                       if (page_count(page) == 1) {
+                               remove_page_from_inode_queue(page);
+                               remove_page_from_hash_queue(page);
+                               page->inode = NULL;
                         }
                         spin_unlock(&pagecache_lock);
  
-                       remove_inode_page(page);
                         UnlockPage(page);
                         page_cache_release(page);
                         err = 1;
@@ -325,17 +332,6 @@ unlock_continue:
                         continue;
                 }
                 spin_unlock(&pagecache_lock);
-
-               /* Is it a buffer page? */
-               if (page->buffers) {
-                       if (buffer_under_min())
-                               continue;
-                       if (!try_to_free_buffers(page))
-                               continue;
-                       err = 1;
-                       goto out;
-               }
-
         } while (count > 0);
         err = 0;
  out:
@@ -1086,17 +1082,14 @@ static int file_send_actor(read_descriptor_t * desc, const char *area, unsigned
         ssize_t written;
         unsigned long count = desc->count;
         struct file *file = (struct file *) desc->buf;
-       struct inode *inode = file->f_dentry->d_inode;
         mm_segment_t old_fs;
  
         if (size > count)
                 size = count;
-       down(&inode->i_sem);
         old_fs = get_fs();
         set_fs(KERNEL_DS);
         written = file->f_op->write(file, area, size, &file->f_pos);
         set_fs(old_fs);
-       up(&inode->i_sem);
         if (written < 0) {
                 desc->error = written;
                 written = 0;
@@ -1362,7 +1355,6 @@ static inline int do_write_page(struct inode * inode, struct file * file,
         int retval;
         unsigned long size;
         loff_t loff = offset;
-       mm_segment_t old_fs;
         int (*writepage) (struct file *, struct page *);
         struct page * page;
  
@@ -1376,8 +1368,6 @@ static inline int do_write_page(struct inode * inode, struct file * file,
                         return -EIO;
         }
         size -= offset;
-       old_fs = get_fs();
-       set_fs(KERNEL_DS);
         retval = -EIO;
         writepage = inode->i_op->writepage;
         page = mem_map + MAP_NR(page_addr);
@@ -1386,11 +1376,13 @@ static inline int do_write_page(struct inode * inode, struct file * file,
         if (writepage) {
                 retval = writepage(file, page);
         } else {
+               mm_segment_t old_fs = get_fs();
+               set_fs(KERNEL_DS);
                 if (size == file->f_op->write(file, page_addr, size, &loff))
-               retval = 0;
+                       retval = 0;
+               set_fs(old_fs);
         }
         UnlockPage(page);
-       set_fs(old_fs);
         return retval;
  }
  
@@ -1426,9 +1418,7 @@ static int filemap_write_page(struct vm_area_struct * vma,
                 return 0;
         }
         
-       down(&inode->i_sem);
         result = do_write_page(inode, file, (const char *) page, offset);
-       up(&inode->i_sem);
         fput(file);
         return result;
  }
@@ -1642,10 +1632,7 @@ static int msync_interval(struct vm_area_struct * vma,
                         struct file * file = vma->vm_file;
                         if (file) {
                                 struct dentry * dentry = file->f_dentry;
-                               struct inode * inode = dentry->d_inode;
-                               down(&inode->i_sem);
                                 error = file_fsync(file, dentry);
-                               up(&inode->i_sem);
                         }
                 }
                 return error;
@@ -1972,10 +1959,8 @@ int kpiod(void * unused)
                         dentry = p->file->f_dentry;
                         inode = dentry->d_inode;
                         
-                       down(&inode->i_sem);
                         do_write_page(inode, p->file,
                                       (const char *) p->page, p->offset);
-                       up(&inode->i_sem);
                         fput(p->file);
                         page_cache_free(p->page);
                         kmem_cache_free(pio_request_cache, p);
author	Linus Torvalds <torvalds@linuxfoundation.org>
	Fri, 23 Nov 2007 20:25:28 +0000 (15:25 -0500)
committer	Linus Torvalds <torvalds@linuxfoundation.org>
	Fri, 23 Nov 2007 20:25:28 +0000 (15:25 -0500)
drivers/char/tty_io.c		patch \| blob \| history
drivers/usb/uhci.c		patch \| blob \| history
fs/buffer.c		patch \| blob \| history
fs/ext2/file.c		patch \| blob \| history
fs/inode.c		patch \| blob \| history
fs/minix/bitmap.c		patch \| blob \| history
fs/minix/file.c		patch \| blob \| history
fs/minix/inode.c		patch \| blob \| history
fs/minix/truncate.c		patch \| blob \| history
fs/nfs/dir.c		patch \| blob \| history
fs/nfs/inode.c		patch \| blob \| history
fs/pipe.c		patch \| blob \| history
fs/read_write.c		patch \| blob \| history
fs/sysv/file.c		patch \| blob \| history
fs/sysv/inode.c		patch \| blob \| history
fs/sysv/truncate.c		patch \| blob \| history
fs/ufs/file.c		patch \| blob \| history
fs/ufs/inode.c		patch \| blob \| history
fs/ufs/truncate.c		patch \| blob \| history
include/linux/fs.h		patch \| blob \| history
include/linux/minix_fs.h		patch \| blob \| history
include/linux/nfs_fs.h		patch \| blob \| history
include/linux/sysv_fs.h		patch \| blob \| history
include/linux/ufs_fs.h		patch \| blob \| history
mm/filemap.c		patch \| blob \| history