[PATCH] no-buffer-head ext2 option

author Andrew Morton <akpm@digeo.com>

Fri, 22 Nov 2002 03:32:45 +0000 (19:32 -0800)

committer Linus Torvalds <torvalds@penguin.transmeta.com>

Fri, 22 Nov 2002 03:32:45 +0000 (19:32 -0800)
author Andrew Morton <akpm@digeo.com>
Fri, 22 Nov 2002 03:32:45 +0000 (19:32 -0800)
committer Linus Torvalds <torvalds@penguin.transmeta.com>
Fri, 22 Nov 2002 03:32:45 +0000 (19:32 -0800)
diff --git a/fs/buffer.c b/fs/buffer.c

index a0c7f850482ff0f4274bcf689cfa6df10a97f550..aacd45b8b2606dd28cf210498476b5034780116e 100644 (file)
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1964,6 +1964,7 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
         struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
         unsigned int blocksize, blocks;
         int nr, i;
+       int fully_mapped = 1;
  
         if (!PageLocked(page))
                 PAGE_BUG(page);
@@ -1986,6 +1987,7 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
                         continue;
  
                 if (!buffer_mapped(bh)) {
+                       fully_mapped = 0;
                         if (iblock < lblock) {
                                 if (get_block(inode, iblock, bh, 0))
                                         SetPageError(page);
@@ -2008,6 +2010,9 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
                 arr[nr++] = bh;
         } while (i++, iblock++, (bh = bh->b_this_page) != head);
  
+       if (fully_mapped)
+               SetPageMappedToDisk(page);
+
         if (!nr) {
                 /*
                  * All buffers are uptodate - we can set the page uptodate
@@ -2204,6 +2209,198 @@ int generic_commit_write(struct file *file, struct page *page,
         return 0;
  }
  
+/*
+ * On entry, the page is fully not uptodate.
+ * On exit the page is fully uptodate in the areas outside (from,to)
+ */
+int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
+                       get_block_t *get_block)
+{
+       struct inode *inode = page->mapping->host;
+       const unsigned blkbits = inode->i_blkbits;
+       const unsigned blocksize = 1 << blkbits;
+       struct buffer_head map_bh;
+       struct buffer_head *read_bh[MAX_BUF_PER_PAGE];
+       unsigned block_in_page;
+       unsigned block_start;
+       sector_t block_in_file;
+       char *kaddr;
+       int nr_reads = 0;
+       int i;
+       int ret = 0;
+       int is_mapped_to_disk = 1;
+       int dirtied_it = 0;
+
+       if (PageMappedToDisk(page))
+               return 0;
+
+       block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
+       map_bh.b_page = page;
+
+       /*
+        * We loop across all blocks in the page, whether or not they are
+        * part of the affected region.  This is so we can discover if the
+        * page is fully mapped-to-disk.
+        */
+       for (block_start = 0, block_in_page = 0;
+                 block_start < PAGE_CACHE_SIZE;
+                 block_in_page++, block_start += blocksize) {
+               unsigned block_end = block_start + blocksize;
+               int create;
+
+               map_bh.b_state = 0;
+               create = 1;
+               if (block_start >= to)
+                       create = 0;
+               ret = get_block(inode, block_in_file + block_in_page,
+                                       &map_bh, create);
+               if (ret)
+                       goto failed;
+               if (!buffer_mapped(&map_bh))
+                       is_mapped_to_disk = 0;
+               if (buffer_new(&map_bh))
+                       unmap_underlying_metadata(map_bh.b_bdev,
+                                                       map_bh.b_blocknr);
+               if (PageUptodate(page))
+                       continue;
+               if (buffer_new(&map_bh) || !buffer_mapped(&map_bh)) {
+                       kaddr = kmap_atomic(page, KM_USER0);
+                       if (block_start < from) {
+                               memset(kaddr+block_start, 0, from-block_start);
+                               dirtied_it = 1;
+                       }
+                       if (block_end > to) {
+                               memset(kaddr + to, 0, block_end - to);
+                               dirtied_it = 1;
+                       }
+                       flush_dcache_page(page);
+                       kunmap_atomic(kaddr, KM_USER0);
+                       continue;
+               }
+               if (buffer_uptodate(&map_bh))
+                       continue;       /* reiserfs does this */
+               if (block_start < from || block_end > to) {
+                       struct buffer_head *bh = alloc_buffer_head();
+
+                       if (!bh) {
+                               ret = -ENOMEM;
+                               goto failed;
+                       }
+                       bh->b_state = map_bh.b_state;
+                       atomic_set(&bh->b_count, 0);
+                       bh->b_this_page = 0;
+                       bh->b_page = page;
+                       bh->b_blocknr = map_bh.b_blocknr;
+                       bh->b_size = blocksize;
+                       bh->b_data = (char *)block_start;
+                       bh->b_bdev = map_bh.b_bdev;
+                       bh->b_private = NULL;
+                       read_bh[nr_reads++] = bh;
+               }
+       }
+
+       if (nr_reads) {
+               ll_rw_block(READ, nr_reads, read_bh);
+               for (i = 0; i < nr_reads; i++) {
+                       wait_on_buffer(read_bh[i]);
+                       if (!buffer_uptodate(read_bh[i]))
+                               ret = -EIO;
+                       free_buffer_head(read_bh[i]);
+                       read_bh[i] = NULL;
+               }
+               if (ret)
+                       goto failed;
+       }
+
+       if (is_mapped_to_disk)
+               SetPageMappedToDisk(page);
+       SetPageUptodate(page);
+
+       /*
+        * Setting the page dirty here isn't necessary for the prepare_write
+        * function - commit_write will do that.  But if/when this function is
+        * used within the pagefault handler to ensure that all mmapped pages
+        * have backing space in the filesystem, we will need to dirty the page
+        * if its contents were altered.
+        */
+       if (dirtied_it)
+               set_page_dirty(page);
+
+       return 0;
+
+failed:
+       for (i = 0; i < nr_reads; i++) {
+               if (read_bh[i])
+                       free_buffer_head(read_bh[i]);
+       }
+
+       /*
+        * Error recovery is pretty slack.  Clear the page and mark it dirty
+        * so we'll later zero out any blocks which _were_ allocated.
+        */
+       kaddr = kmap_atomic(page, KM_USER0);
+       memset(kaddr, 0, PAGE_CACHE_SIZE);
+       kunmap_atomic(kaddr, KM_USER0);
+       SetPageUptodate(page);
+       set_page_dirty(page);
+       return ret;
+}
+EXPORT_SYMBOL(nobh_prepare_write);
+
+int nobh_commit_write(struct file *file, struct page *page,
+               unsigned from, unsigned to)
+{
+       struct inode *inode = page->mapping->host;
+       loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+
+       set_page_dirty(page);
+       if (pos > inode->i_size) {
+               inode->i_size = pos;
+               mark_inode_dirty(inode);
+       }
+       return 0;
+}
+EXPORT_SYMBOL(nobh_commit_write);
+
+/*
+ * This function assumes that ->prepare_write() uses nobh_prepare_write().
+ */
+int nobh_truncate_page(struct address_space *mapping, loff_t from)
+{
+       struct inode *inode = mapping->host;
+       unsigned blocksize = 1 << inode->i_blkbits;
+       pgoff_t index = from >> PAGE_CACHE_SHIFT;
+       unsigned offset = from & (PAGE_CACHE_SIZE-1);
+       unsigned to;
+       struct page *page;
+       struct address_space_operations *a_ops = mapping->a_ops;
+       char *kaddr;
+       int ret = 0;
+
+       if ((offset & (blocksize - 1)) == 0)
+               goto out;
+
+       ret = -ENOMEM;
+       page = grab_cache_page(mapping, index);
+       if (!page)
+               goto out;
+
+       to = (offset + blocksize) & ~(blocksize - 1);
+       ret = a_ops->prepare_write(NULL, page, offset, to);
+       if (ret == 0) {
+               kaddr = kmap_atomic(page, KM_USER0);
+               memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
+               flush_dcache_page(page);
+               kunmap_atomic(kaddr, KM_USER0);
+               set_page_dirty(page);
+       }
+       unlock_page(page);
+       page_cache_release(page);
+out:
+       return ret;
+}
+EXPORT_SYMBOL(nobh_truncate_page);
+
  int block_truncate_page(struct address_space *mapping,
                         loff_t from, get_block_t *get_block)
  {
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h

index e5de47ed3407d2ced445c1fc89600cd868c4ceb2..59b5921837cdedb76b81a36204866aa11b4c3117 100644 (file)
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -120,6 +120,7 @@ extern struct file_operations ext2_file_operations;
  
  /* inode.c */
  extern struct address_space_operations ext2_aops;
+extern struct address_space_operations ext2_nobh_aops;
  
  /* namei.c */
  extern struct inode_operations ext2_dir_inode_operations;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c

index 7fe83a0443869569f639e25cb216bb7af5b83cfc..29cccde53b74ef4a8bc033bc466f7717d121f242 100644 (file)
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -612,6 +612,13 @@ ext2_prepare_write(struct file *file, struct page *page,
         return block_prepare_write(page,from,to,ext2_get_block);
  }
  
+static int
+ext2_nobh_prepare_write(struct file *file, struct page *page,
+                       unsigned from, unsigned to)
+{
+       return nobh_prepare_write(page,from,to,ext2_get_block);
+}
+
  static sector_t ext2_bmap(struct address_space *mapping, sector_t block)
  {
         return generic_block_bmap(mapping,block,ext2_get_block);
@@ -657,6 +664,18 @@ struct address_space_operations ext2_aops = {
         .writepages             = ext2_writepages,
  };
  
+struct address_space_operations ext2_nobh_aops = {
+       .readpage               = ext2_readpage,
+       .readpages              = ext2_readpages,
+       .writepage              = ext2_writepage,
+       .sync_page              = block_sync_page,
+       .prepare_write          = ext2_nobh_prepare_write,
+       .commit_write           = nobh_commit_write,
+       .bmap                   = ext2_bmap,
+       .direct_IO              = ext2_direct_IO,
+       .writepages             = ext2_writepages,
+};
+
  /*
   * Probably it should be a library function... search for first non-zero word
   * or memcmp with zero_page, whatever is better for particular architecture.
@@ -864,7 +883,11 @@ void ext2_truncate (struct inode * inode)
         iblock = (inode->i_size + blocksize-1)
                                         >> EXT2_BLOCK_SIZE_BITS(inode->i_sb);
  
-       block_truncate_page(inode->i_mapping, inode->i_size, ext2_get_block);
+       if (test_opt(inode->i_sb, NOBH))
+               nobh_truncate_page(inode->i_mapping, inode->i_size);
+       else
+               block_truncate_page(inode->i_mapping,
+                               inode->i_size, ext2_get_block);
  
         n = ext2_block_to_path(inode, iblock, offsets, NULL);
         if (n == 0)
@@ -1044,17 +1067,26 @@ void ext2_read_inode (struct inode * inode)
         if (S_ISREG(inode->i_mode)) {
                 inode->i_op = &ext2_file_inode_operations;
                 inode->i_fop = &ext2_file_operations;
-               inode->i_mapping->a_ops = &ext2_aops;
+               if (test_opt(inode->i_sb, NOBH))
+                       inode->i_mapping->a_ops = &ext2_nobh_aops;
+               else
+                       inode->i_mapping->a_ops = &ext2_aops;
         } else if (S_ISDIR(inode->i_mode)) {
                 inode->i_op = &ext2_dir_inode_operations;
                 inode->i_fop = &ext2_dir_operations;
-               inode->i_mapping->a_ops = &ext2_aops;
+               if (test_opt(inode->i_sb, NOBH))
+                       inode->i_mapping->a_ops = &ext2_nobh_aops;
+               else
+                       inode->i_mapping->a_ops = &ext2_aops;
         } else if (S_ISLNK(inode->i_mode)) {
                 if (ext2_inode_is_fast_symlink(inode))
                         inode->i_op = &ext2_fast_symlink_inode_operations;
                 else {
                         inode->i_op = &ext2_symlink_inode_operations;
-                       inode->i_mapping->a_ops = &ext2_aops;
+                       if (test_opt(inode->i_sb, NOBH))
+                               inode->i_mapping->a_ops = &ext2_nobh_aops;
+                       else
+                               inode->i_mapping->a_ops = &ext2_aops;
                 }
         } else {
                 inode->i_op = &ext2_special_inode_operations;
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c

index 9f0788e3f6ef7e06e5b0f78544500a7dac2a14fb..04489df5a2e5a8a7dc5d0362930e69759fda33ff 100644 (file)
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -127,7 +127,10 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, int mode)
         if (!IS_ERR(inode)) {
                 inode->i_op = &ext2_file_inode_operations;
                 inode->i_fop = &ext2_file_operations;
-               inode->i_mapping->a_ops = &ext2_aops;
+               if (test_opt(inode->i_sb, NOBH))
+                       inode->i_mapping->a_ops = &ext2_nobh_aops;
+               else
+                       inode->i_mapping->a_ops = &ext2_aops;
                 mark_inode_dirty(inode);
                 err = ext2_add_nondir(dentry, inode);
         }
@@ -168,7 +171,10 @@ static int ext2_symlink (struct inode * dir, struct dentry * dentry,
         if (l > sizeof (EXT2_I(inode)->i_data)) {
                 /* slow symlink */
                 inode->i_op = &ext2_symlink_inode_operations;
-               inode->i_mapping->a_ops = &ext2_aops;
+               if (test_opt(inode->i_sb, NOBH))
+                       inode->i_mapping->a_ops = &ext2_nobh_aops;
+               else
+                       inode->i_mapping->a_ops = &ext2_aops;
                 err = page_symlink(inode, symname, l);
                 if (err)
                         goto out_fail;
@@ -222,7 +228,10 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode)
  
         inode->i_op = &ext2_dir_inode_operations;
         inode->i_fop = &ext2_dir_operations;
-       inode->i_mapping->a_ops = &ext2_aops;
+       if (test_opt(inode->i_sb, NOBH))
+               inode->i_mapping->a_ops = &ext2_nobh_aops;
+       else
+               inode->i_mapping->a_ops = &ext2_aops;
  
         ext2_inc_count(inode);
  
diff --git a/fs/ext2/super.c b/fs/ext2/super.c

index a0acf56bd78101b314d90906d833cb3b9cc7c284..23116c16e3ce9ddfda2001809f6914bcbb95f3d1 100644 (file)
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -391,6 +391,8 @@ static int parse_options (char * options,
                         set_opt (sbi->s_mount_opt, OLDALLOC);
                 else if (!strcmp (this_char, "orlov"))
                         clear_opt (sbi->s_mount_opt, OLDALLOC);
+               else if (!strcmp (this_char, "nobh"))
+                       set_opt(sbi->s_mount_opt, NOBH);
                 /* Silently ignore the quota options */
                 else if (!strcmp (this_char, "grpquota")
                          || !strcmp (this_char, "noquota")
diff --git a/fs/mpage.c b/fs/mpage.c

index 05622a8e515a8aed5f4b53b68035bde9827da6c8..8307f43f18b69691c51b89fd4fb0f902738dad14 100644 (file)
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -178,6 +178,7 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
         struct block_device *bdev = NULL;
         struct buffer_head bh;
         int length;
+       int fully_mapped = 1;
  
         if (page_has_buffers(page))
                 goto confused;
@@ -194,6 +195,7 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
                 }
  
                 if (!buffer_mapped(&bh)) {
+                       fully_mapped = 0;
                         if (first_hole == blocks_per_page)
                                 first_hole = page_block;
                         continue;
@@ -220,6 +222,8 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
                         unlock_page(page);
                         goto out;
                 }
+       } else if (fully_mapped) {
+               SetPageMappedToDisk(page);
         }
  
         /*
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h

index 8587dd6f714648b69933629b5668c58b2585447a..4e7a9bbf99ddf6cf9facd28ec41f4fca13769a7d 100644 (file)
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -190,6 +190,9 @@ sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *);
  int generic_commit_write(struct file *, struct page *, unsigned, unsigned);
  int block_truncate_page(struct address_space *, loff_t, get_block_t *);
  int file_fsync(struct file *, struct dentry *, int);
+int nobh_prepare_write(struct page*, unsigned, unsigned, get_block_t*);
+int nobh_commit_write(struct file *, struct page *, unsigned, unsigned);
+int nobh_truncate_page(struct address_space *, loff_t);
  
  #define OSYNC_METADATA (1<<0)
  #define OSYNC_DATA     (1<<1)
diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h

index 0d008cfb99a855ee488b44b637d132acd001a93f..d701ba88c68821728c302090cee6af85ea6679a7 100644 (file)
--- a/include/linux/ext2_fs.h
+++ b/include/linux/ext2_fs.h
@@ -308,6 +308,7 @@ struct ext2_inode {
  #define EXT2_MOUNT_ERRORS_RO           0x0020  /* Remount fs ro on errors */
  #define EXT2_MOUNT_ERRORS_PANIC                0x0040  /* Panic on errors */
  #define EXT2_MOUNT_MINIX_DF            0x0080  /* Mimics the Minix statfs */
+#define EXT2_MOUNT_NOBH                        0x0100  /* No buffer_heads */
  #define EXT2_MOUNT_NO_UID32            0x0200  /* Disable 32-bit UIDs */
  #define EXT2_MOUNT_XATTR_USER          0x4000  /* Extended user attributes */
  #define EXT2_MOUNT_POSIX_ACL           0x8000  /* POSIX Access Control Lists */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h

index 44480d80952fab344e0b271c97d27b811571f9d7..f02449871c077d7e3cbafc384dc1a46f76d9a5ef 100644 (file)
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -70,6 +70,7 @@
  #define PG_chainlock           15      /* lock bit for ->pte_chain */
  
  #define PG_direct              16      /* ->pte_chain points directly at pte */
+#define PG_mappedtodisk                17      /* Has blocks allocated on-disk */
  
  /*
   * Global page accounting.  One instance per CPU.  Only unsigned longs are
@@ -233,6 +234,10 @@ extern void get_full_page_state(struct page_state *ret);
  #define ClearPageDirect(page)          clear_bit(PG_direct, &(page)->flags)
  #define TestClearPageDirect(page)      test_and_clear_bit(PG_direct, &(page)->flags)
  
+#define PageMappedToDisk(page) test_bit(PG_mappedtodisk, &(page)->flags)
+#define SetPageMappedToDisk(page) set_bit(PG_mappedtodisk, &(page)->flags)
+#define ClearPageMappedToDisk(page) clear_bit(PG_mappedtodisk, &(page)->flags)
+
  /*
   * The PageSwapCache predicate doesn't use a PG_flag at this time,
   * but it may again do so one day.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index d8921e02318ae098620896912d3897ded226f556..a810142b49c0dc2b7a4befe99b237e46579fc75d 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -265,7 +265,7 @@ static void prep_new_page(struct page *page, int order)
  
         page->flags &= ~(1 << PG_uptodate | 1 << PG_error |
                         1 << PG_referenced | 1 << PG_arch_1 |
-                       1 << PG_checked);
+                       1 << PG_checked | 1 << PG_mappedtodisk);
         set_page_refs(page, order);
  }
  
diff --git a/mm/truncate.c b/mm/truncate.c

index 884b4e3930c2869710e7e40af3ff051ccdf6cf41..25843367c31ee38e958756dbe1760d6db40d5b01 100644 (file)
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -52,6 +52,7 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
  
         clear_page_dirty(page);
         ClearPageUptodate(page);
+       ClearPageMappedToDisk(page);
         remove_from_page_cache(page);
         page_cache_release(page);       /* pagecache ref */
  }
author	Andrew Morton <akpm@digeo.com>
	Fri, 22 Nov 2002 03:32:45 +0000 (19:32 -0800)
committer	Linus Torvalds <torvalds@penguin.transmeta.com>
	Fri, 22 Nov 2002 03:32:45 +0000 (19:32 -0800)
fs/buffer.c		patch \| blob \| history
fs/ext2/ext2.h		patch \| blob \| history
fs/ext2/inode.c		patch \| blob \| history
fs/ext2/namei.c		patch \| blob \| history
fs/ext2/super.c		patch \| blob \| history
fs/mpage.c		patch \| blob \| history
include/linux/buffer_head.h		patch \| blob \| history
include/linux/ext2_fs.h		patch \| blob \| history
include/linux/page-flags.h		patch \| blob \| history
mm/page_alloc.c		patch \| blob \| history
mm/truncate.c		patch \| blob \| history