struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
unsigned int blocksize, blocks;
int nr, i;
+ int fully_mapped = 1;
if (!PageLocked(page))
PAGE_BUG(page);
continue;
if (!buffer_mapped(bh)) {
+ fully_mapped = 0;
if (iblock < lblock) {
if (get_block(inode, iblock, bh, 0))
SetPageError(page);
arr[nr++] = bh;
} while (i++, iblock++, (bh = bh->b_this_page) != head);
+ if (fully_mapped)
+ SetPageMappedToDisk(page);
+
if (!nr) {
/*
* All buffers are uptodate - we can set the page uptodate
return 0;
}
+/*
+ * On entry, the page is fully not uptodate.
+ * On exit the page is fully uptodate in the areas outside (from,to)
+ */
+int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
+ get_block_t *get_block)
+{
+ struct inode *inode = page->mapping->host;
+ const unsigned blkbits = inode->i_blkbits;
+ const unsigned blocksize = 1 << blkbits;
+ struct buffer_head map_bh;
+ struct buffer_head *read_bh[MAX_BUF_PER_PAGE];
+ unsigned block_in_page;
+ unsigned block_start;
+ sector_t block_in_file;
+ char *kaddr;
+ int nr_reads = 0;
+ int i;
+ int ret = 0;
+ int is_mapped_to_disk = 1;
+ int dirtied_it = 0;
+
+ if (PageMappedToDisk(page))
+ return 0;
+
+ block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
+ map_bh.b_page = page;
+
+ /*
+ * We loop across all blocks in the page, whether or not they are
+ * part of the affected region. This is so we can discover if the
+ * page is fully mapped-to-disk.
+ */
+ for (block_start = 0, block_in_page = 0;
+ block_start < PAGE_CACHE_SIZE;
+ block_in_page++, block_start += blocksize) {
+ unsigned block_end = block_start + blocksize;
+ int create;
+
+ map_bh.b_state = 0;
+ create = 1;
+ if (block_start >= to)
+ create = 0;
+ ret = get_block(inode, block_in_file + block_in_page,
+ &map_bh, create);
+ if (ret)
+ goto failed;
+ if (!buffer_mapped(&map_bh))
+ is_mapped_to_disk = 0;
+ if (buffer_new(&map_bh))
+ unmap_underlying_metadata(map_bh.b_bdev,
+ map_bh.b_blocknr);
+ if (PageUptodate(page))
+ continue;
+ if (buffer_new(&map_bh) || !buffer_mapped(&map_bh)) {
+ kaddr = kmap_atomic(page, KM_USER0);
+ if (block_start < from) {
+ memset(kaddr+block_start, 0, from-block_start);
+ dirtied_it = 1;
+ }
+ if (block_end > to) {
+ memset(kaddr + to, 0, block_end - to);
+ dirtied_it = 1;
+ }
+ flush_dcache_page(page);
+ kunmap_atomic(kaddr, KM_USER0);
+ continue;
+ }
+ if (buffer_uptodate(&map_bh))
+ continue; /* reiserfs does this */
+ if (block_start < from || block_end > to) {
+ struct buffer_head *bh = alloc_buffer_head();
+
+ if (!bh) {
+ ret = -ENOMEM;
+ goto failed;
+ }
+ bh->b_state = map_bh.b_state;
+ atomic_set(&bh->b_count, 0);
+ bh->b_this_page = 0;
+ bh->b_page = page;
+ bh->b_blocknr = map_bh.b_blocknr;
+ bh->b_size = blocksize;
+ bh->b_data = (char *)block_start;
+ bh->b_bdev = map_bh.b_bdev;
+ bh->b_private = NULL;
+ read_bh[nr_reads++] = bh;
+ }
+ }
+
+ if (nr_reads) {
+ ll_rw_block(READ, nr_reads, read_bh);
+ for (i = 0; i < nr_reads; i++) {
+ wait_on_buffer(read_bh[i]);
+ if (!buffer_uptodate(read_bh[i]))
+ ret = -EIO;
+ free_buffer_head(read_bh[i]);
+ read_bh[i] = NULL;
+ }
+ if (ret)
+ goto failed;
+ }
+
+ if (is_mapped_to_disk)
+ SetPageMappedToDisk(page);
+ SetPageUptodate(page);
+
+ /*
+ * Setting the page dirty here isn't necessary for the prepare_write
+ * function - commit_write will do that. But if/when this function is
+ * used within the pagefault handler to ensure that all mmapped pages
+ * have backing space in the filesystem, we will need to dirty the page
+ * if its contents were altered.
+ */
+ if (dirtied_it)
+ set_page_dirty(page);
+
+ return 0;
+
+failed:
+ for (i = 0; i < nr_reads; i++) {
+ if (read_bh[i])
+ free_buffer_head(read_bh[i]);
+ }
+
+ /*
+ * Error recovery is pretty slack. Clear the page and mark it dirty
+ * so we'll later zero out any blocks which _were_ allocated.
+ */
+ kaddr = kmap_atomic(page, KM_USER0);
+ memset(kaddr, 0, PAGE_CACHE_SIZE);
+ kunmap_atomic(kaddr, KM_USER0);
+ SetPageUptodate(page);
+ set_page_dirty(page);
+ return ret;
+}
+EXPORT_SYMBOL(nobh_prepare_write);
+
+int nobh_commit_write(struct file *file, struct page *page,
+ unsigned from, unsigned to)
+{
+ struct inode *inode = page->mapping->host;
+ loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+
+ set_page_dirty(page);
+ if (pos > inode->i_size) {
+ inode->i_size = pos;
+ mark_inode_dirty(inode);
+ }
+ return 0;
+}
+EXPORT_SYMBOL(nobh_commit_write);
+
+/*
+ * This function assumes that ->prepare_write() uses nobh_prepare_write().
+ */
+int nobh_truncate_page(struct address_space *mapping, loff_t from)
+{
+ struct inode *inode = mapping->host;
+ unsigned blocksize = 1 << inode->i_blkbits;
+ pgoff_t index = from >> PAGE_CACHE_SHIFT;
+ unsigned offset = from & (PAGE_CACHE_SIZE-1);
+ unsigned to;
+ struct page *page;
+ struct address_space_operations *a_ops = mapping->a_ops;
+ char *kaddr;
+ int ret = 0;
+
+ if ((offset & (blocksize - 1)) == 0)
+ goto out;
+
+ ret = -ENOMEM;
+ page = grab_cache_page(mapping, index);
+ if (!page)
+ goto out;
+
+ to = (offset + blocksize) & ~(blocksize - 1);
+ ret = a_ops->prepare_write(NULL, page, offset, to);
+ if (ret == 0) {
+ kaddr = kmap_atomic(page, KM_USER0);
+ memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
+ flush_dcache_page(page);
+ kunmap_atomic(kaddr, KM_USER0);
+ set_page_dirty(page);
+ }
+ unlock_page(page);
+ page_cache_release(page);
+out:
+ return ret;
+}
+EXPORT_SYMBOL(nobh_truncate_page);
+
int block_truncate_page(struct address_space *mapping,
loff_t from, get_block_t *get_block)
{
/* inode.c */
extern struct address_space_operations ext2_aops;
+extern struct address_space_operations ext2_nobh_aops;
/* namei.c */
extern struct inode_operations ext2_dir_inode_operations;
return block_prepare_write(page,from,to,ext2_get_block);
}
+static int
+ext2_nobh_prepare_write(struct file *file, struct page *page,
+ unsigned from, unsigned to)
+{
+ return nobh_prepare_write(page,from,to,ext2_get_block);
+}
+
static sector_t ext2_bmap(struct address_space *mapping, sector_t block)
{
return generic_block_bmap(mapping,block,ext2_get_block);
.writepages = ext2_writepages,
};
+struct address_space_operations ext2_nobh_aops = {
+ .readpage = ext2_readpage,
+ .readpages = ext2_readpages,
+ .writepage = ext2_writepage,
+ .sync_page = block_sync_page,
+ .prepare_write = ext2_nobh_prepare_write,
+ .commit_write = nobh_commit_write,
+ .bmap = ext2_bmap,
+ .direct_IO = ext2_direct_IO,
+ .writepages = ext2_writepages,
+};
+
/*
* Probably it should be a library function... search for first non-zero word
* or memcmp with zero_page, whatever is better for particular architecture.
iblock = (inode->i_size + blocksize-1)
>> EXT2_BLOCK_SIZE_BITS(inode->i_sb);
- block_truncate_page(inode->i_mapping, inode->i_size, ext2_get_block);
+ if (test_opt(inode->i_sb, NOBH))
+ nobh_truncate_page(inode->i_mapping, inode->i_size);
+ else
+ block_truncate_page(inode->i_mapping,
+ inode->i_size, ext2_get_block);
n = ext2_block_to_path(inode, iblock, offsets, NULL);
if (n == 0)
if (S_ISREG(inode->i_mode)) {
inode->i_op = &ext2_file_inode_operations;
inode->i_fop = &ext2_file_operations;
- inode->i_mapping->a_ops = &ext2_aops;
+ if (test_opt(inode->i_sb, NOBH))
+ inode->i_mapping->a_ops = &ext2_nobh_aops;
+ else
+ inode->i_mapping->a_ops = &ext2_aops;
} else if (S_ISDIR(inode->i_mode)) {
inode->i_op = &ext2_dir_inode_operations;
inode->i_fop = &ext2_dir_operations;
- inode->i_mapping->a_ops = &ext2_aops;
+ if (test_opt(inode->i_sb, NOBH))
+ inode->i_mapping->a_ops = &ext2_nobh_aops;
+ else
+ inode->i_mapping->a_ops = &ext2_aops;
} else if (S_ISLNK(inode->i_mode)) {
if (ext2_inode_is_fast_symlink(inode))
inode->i_op = &ext2_fast_symlink_inode_operations;
else {
inode->i_op = &ext2_symlink_inode_operations;
- inode->i_mapping->a_ops = &ext2_aops;
+ if (test_opt(inode->i_sb, NOBH))
+ inode->i_mapping->a_ops = &ext2_nobh_aops;
+ else
+ inode->i_mapping->a_ops = &ext2_aops;
}
} else {
inode->i_op = &ext2_special_inode_operations;
if (!IS_ERR(inode)) {
inode->i_op = &ext2_file_inode_operations;
inode->i_fop = &ext2_file_operations;
- inode->i_mapping->a_ops = &ext2_aops;
+ if (test_opt(inode->i_sb, NOBH))
+ inode->i_mapping->a_ops = &ext2_nobh_aops;
+ else
+ inode->i_mapping->a_ops = &ext2_aops;
mark_inode_dirty(inode);
err = ext2_add_nondir(dentry, inode);
}
if (l > sizeof (EXT2_I(inode)->i_data)) {
/* slow symlink */
inode->i_op = &ext2_symlink_inode_operations;
- inode->i_mapping->a_ops = &ext2_aops;
+ if (test_opt(inode->i_sb, NOBH))
+ inode->i_mapping->a_ops = &ext2_nobh_aops;
+ else
+ inode->i_mapping->a_ops = &ext2_aops;
err = page_symlink(inode, symname, l);
if (err)
goto out_fail;
inode->i_op = &ext2_dir_inode_operations;
inode->i_fop = &ext2_dir_operations;
- inode->i_mapping->a_ops = &ext2_aops;
+ if (test_opt(inode->i_sb, NOBH))
+ inode->i_mapping->a_ops = &ext2_nobh_aops;
+ else
+ inode->i_mapping->a_ops = &ext2_aops;
ext2_inc_count(inode);
set_opt (sbi->s_mount_opt, OLDALLOC);
else if (!strcmp (this_char, "orlov"))
clear_opt (sbi->s_mount_opt, OLDALLOC);
+ else if (!strcmp (this_char, "nobh"))
+ set_opt(sbi->s_mount_opt, NOBH);
/* Silently ignore the quota options */
else if (!strcmp (this_char, "grpquota")
|| !strcmp (this_char, "noquota")
struct block_device *bdev = NULL;
struct buffer_head bh;
int length;
+ int fully_mapped = 1;
if (page_has_buffers(page))
goto confused;
}
if (!buffer_mapped(&bh)) {
+ fully_mapped = 0;
if (first_hole == blocks_per_page)
first_hole = page_block;
continue;
unlock_page(page);
goto out;
}
+ } else if (fully_mapped) {
+ SetPageMappedToDisk(page);
}
/*
int generic_commit_write(struct file *, struct page *, unsigned, unsigned);
int block_truncate_page(struct address_space *, loff_t, get_block_t *);
int file_fsync(struct file *, struct dentry *, int);
+int nobh_prepare_write(struct page*, unsigned, unsigned, get_block_t*);
+int nobh_commit_write(struct file *, struct page *, unsigned, unsigned);
+int nobh_truncate_page(struct address_space *, loff_t);
#define OSYNC_METADATA (1<<0)
#define OSYNC_DATA (1<<1)
#define EXT2_MOUNT_ERRORS_RO 0x0020 /* Remount fs ro on errors */
#define EXT2_MOUNT_ERRORS_PANIC 0x0040 /* Panic on errors */
#define EXT2_MOUNT_MINIX_DF 0x0080 /* Mimics the Minix statfs */
+#define EXT2_MOUNT_NOBH 0x0100 /* No buffer_heads */
#define EXT2_MOUNT_NO_UID32 0x0200 /* Disable 32-bit UIDs */
#define EXT2_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */
#define EXT2_MOUNT_POSIX_ACL 0x8000 /* POSIX Access Control Lists */
#define PG_chainlock 15 /* lock bit for ->pte_chain */
#define PG_direct 16 /* ->pte_chain points directly at pte */
+#define PG_mappedtodisk 17 /* Has blocks allocated on-disk */
/*
* Global page accounting. One instance per CPU. Only unsigned longs are
#define ClearPageDirect(page) clear_bit(PG_direct, &(page)->flags)
#define TestClearPageDirect(page) test_and_clear_bit(PG_direct, &(page)->flags)
+#define PageMappedToDisk(page) test_bit(PG_mappedtodisk, &(page)->flags)
+#define SetPageMappedToDisk(page) set_bit(PG_mappedtodisk, &(page)->flags)
+#define ClearPageMappedToDisk(page) clear_bit(PG_mappedtodisk, &(page)->flags)
+
/*
* The PageSwapCache predicate doesn't use a PG_flag at this time,
* but it may again do so one day.
page->flags &= ~(1 << PG_uptodate | 1 << PG_error |
1 << PG_referenced | 1 << PG_arch_1 |
- 1 << PG_checked);
+ 1 << PG_checked | 1 << PG_mappedtodisk);
set_page_refs(page, order);
}
clear_page_dirty(page);
ClearPageUptodate(page);
+ ClearPageMappedToDisk(page);
remove_from_page_cache(page);
page_cache_release(page); /* pagecache ref */
}