From f6506d6829c24a25c39a9c3b3bac4bea569dffbf Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 23 Nov 2007 15:25:28 -0500 Subject: [PATCH] Import 2.3.7pre3 --- drivers/char/tty_io.c | 7 +- drivers/usb/uhci.c | 3 +- fs/buffer.c | 399 +++++++++++++++++++++------------------ fs/ext2/file.c | 29 +-- fs/inode.c | 4 - fs/minix/bitmap.c | 8 - fs/minix/file.c | 120 +++++------- fs/minix/inode.c | 290 ++++++++++++++++++++-------- fs/minix/truncate.c | 11 +- fs/nfs/dir.c | 257 ++++++++++++++++--------- fs/nfs/inode.c | 35 ++-- fs/pipe.c | 8 +- fs/read_write.c | 7 - fs/sysv/file.c | 261 ++++++------------------- fs/sysv/inode.c | 153 ++++++++++----- fs/sysv/truncate.c | 13 +- fs/ufs/file.c | 259 +++++++++---------------- fs/ufs/inode.c | 169 +++++++++++++---- fs/ufs/truncate.c | 11 +- include/linux/fs.h | 19 +- include/linux/minix_fs.h | 1 + include/linux/nfs_fs.h | 3 +- include/linux/sysv_fs.h | 1 + include/linux/ufs_fs.h | 1 + mm/filemap.c | 93 ++++----- 25 files changed, 1116 insertions(+), 1046 deletions(-) diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 3e11bf7d60d5..33497880186c 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -651,9 +651,7 @@ static inline ssize_t do_tty_write( ssize_t ret = 0, written = 0; struct inode *inode = file->f_dentry->d_inode; - up(&inode->i_sem); - if (down_interruptible(&inode->i_atomic_write)) { - down(&inode->i_sem); + if (down_interruptible(&inode->i_sem)) { return -ERESTARTSYS; } for (;;) { @@ -678,8 +676,7 @@ static inline ssize_t do_tty_write( file->f_dentry->d_inode->i_mtime = CURRENT_TIME; ret = written; } - up(&inode->i_atomic_write); - down(&inode->i_sem); + up(&inode->i_sem); return ret; } diff --git a/drivers/usb/uhci.c b/drivers/usb/uhci.c index 73aab5fa1013..2f8010ed1c90 100644 --- a/drivers/usb/uhci.c +++ b/drivers/usb/uhci.c @@ -1264,7 +1264,8 @@ static void uhci_interrupt_notify(struct uhci *uhci) struct uhci_qh *interrupt_qh = td->qh; usb_dotoggle(td->dev, usb_pipeendpoint(td->info)); - td->info |= 1 << 19; /* toggle between data0 and data1 */ + td->info &= ~(1 << 19); /* clear data toggle */ + td->info |= usb_gettoggle(td->dev, usb_pipeendpoint(td->info)) << 19; /* toggle between data0 and data1 */ td->status = (td->status & 0x2f000000) | (1 << 23) | (1 << 24); /* active */ /* Remove then readd? Is that necessary */ diff --git a/fs/buffer.c b/fs/buffer.c index 9ea490ef43a5..0f5302a7d2bc 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -109,7 +109,7 @@ union bdflush_param { int dummy3; /* unused */ } b_un; unsigned int data[N_PARAM]; -} bdf_prm = {{90, 500, 64, 256, 15, 30*HZ, 5*HZ, 1884, 2}}; +} bdf_prm = {{100, 500, 64, 256, 15, 30*HZ, 5*HZ, 1884, 2}}; /* These are the min and max parameter values that we will allow to be assigned */ int bdflush_min[N_PARAM] = { 0, 10, 5, 25, 0, 1*HZ, 1*HZ, 1, 1}; @@ -422,6 +422,24 @@ void invalidate_buffers(kdev_t dev) #define _hashfn(dev,block) (((unsigned)(HASHDEV(dev)^block)) & bh_hash_mask) #define hash(dev,block) hash_table[_hashfn(dev,block)] +static void insert_into_hash_list(struct buffer_head * bh) +{ + bh->b_next = NULL; + bh->b_pprev = NULL; + if (bh->b_dev) { + struct buffer_head **bhp = &hash(bh->b_dev, bh->b_blocknr); + struct buffer_head *next = *bhp; + + if (next) { + bh->b_next = next; + next->b_pprev = &bh->b_next; + } + *bhp = bh; + bh->b_pprev = bhp; + nr_hashed_buffers++; + } +} + static inline void remove_from_hash_queue(struct buffer_head * bh) { struct buffer_head **pprev = bh->b_pprev; @@ -433,16 +451,36 @@ static inline void remove_from_hash_queue(struct buffer_head * bh) } *pprev = next; bh->b_pprev = NULL; + nr_hashed_buffers--; } - nr_hashed_buffers--; } -static inline void remove_from_lru_list(struct buffer_head * bh) +static void insert_into_lru_list(struct buffer_head * bh) { - if (!(bh->b_prev_free) || !(bh->b_next_free)) { - printk("VFS: LRU block list corrupted\n"); - *(int*)0 = 0; + struct buffer_head **bhp = &lru_list[bh->b_list]; + + if(!*bhp) { + *bhp = bh; + bh->b_prev_free = bh; } + + if (bh->b_next_free) + panic("VFS: buffer LRU pointers corrupted"); + + bh->b_next_free = *bhp; + bh->b_prev_free = (*bhp)->b_prev_free; + (*bhp)->b_prev_free->b_next_free = bh; + (*bhp)->b_prev_free = bh; + + nr_buffers++; + nr_buffers_type[bh->b_list]++; +} + +static inline void remove_from_lru_list(struct buffer_head * bh) +{ + if (!(bh->b_prev_free) || !(bh->b_next_free)) + return; + if (bh->b_dev == B_FREE) { printk("LRU list corrupted"); *(int*)0 = 0; @@ -455,6 +493,9 @@ static inline void remove_from_lru_list(struct buffer_head * bh) if (lru_list[bh->b_list] == bh) lru_list[bh->b_list] = NULL; bh->b_next_free = bh->b_prev_free = NULL; + + nr_buffers--; + nr_buffers_type[bh->b_list]--; } static inline void remove_from_free_list(struct buffer_head * bh) @@ -479,15 +520,8 @@ static inline void remove_from_free_list(struct buffer_head * bh) static void remove_from_queues(struct buffer_head * bh) { - if(bh->b_dev == B_FREE) { - remove_from_free_list(bh); /* Free list entries should not be - in the hash queue */ - goto out; - } - nr_buffers_type[bh->b_list]--; remove_from_hash_queue(bh); remove_from_lru_list(bh); -out: } static inline void put_last_free(struct buffer_head * bh) @@ -510,69 +544,6 @@ static inline void put_last_free(struct buffer_head * bh) } } -static void insert_into_queues(struct buffer_head * bh) -{ - /* put at end of free list */ - if(bh->b_dev == B_FREE) { - put_last_free(bh); - } else { - struct buffer_head **bhp = &lru_list[bh->b_list]; - - if(!*bhp) { - *bhp = bh; - bh->b_prev_free = bh; - } - - if (bh->b_next_free) - panic("VFS: buffer LRU pointers corrupted"); - - bh->b_next_free = *bhp; - bh->b_prev_free = (*bhp)->b_prev_free; - (*bhp)->b_prev_free->b_next_free = bh; - (*bhp)->b_prev_free = bh; - - nr_buffers_type[bh->b_list]++; - - /* Put the buffer in new hash-queue if it has a device. */ - bh->b_next = NULL; - bh->b_pprev = NULL; - if (bh->b_dev) { - struct buffer_head **bhp = &hash(bh->b_dev, bh->b_blocknr); - struct buffer_head *next = *bhp; - - if (next) { - bh->b_next = next; - next->b_pprev = &bh->b_next; - } - *bhp = bh; - bh->b_pprev = bhp; - } - nr_hashed_buffers++; - } -} - -static void insert_into_dirty_queue(struct buffer_head * bh) -{ - struct buffer_head **bhp; - - - bhp = &lru_list[BUF_DIRTY]; - if(!*bhp) { - *bhp = bh; - bh->b_prev_free = bh; - } - if (bh->b_next_free) - BUG(); - - bh->b_next_free = *bhp; - bh->b_prev_free = (*bhp)->b_prev_free; - (*bhp)->b_prev_free->b_next_free = bh; - (*bhp)->b_prev_free = bh; - - nr_buffers++; - nr_buffers_type[BUF_DIRTY]++; -} - struct buffer_head * find_buffer(kdev_t dev, int block, int size) { struct buffer_head * next; @@ -673,7 +644,7 @@ void set_blocksize(kdev_t dev, int size) } remove_from_queues(bh); bh->b_dev=B_FREE; - insert_into_queues(bh); + put_last_free(bh); } } } @@ -693,7 +664,6 @@ static void refill_freelist(int size) void init_buffer(struct buffer_head *bh, kdev_t dev, int block, bh_end_io_t *handler, void *dev_id) { - bh->b_count = 1; bh->b_list = BUF_CLEAN; bh->b_flushtime = 0; bh->b_dev = dev; @@ -743,8 +713,12 @@ get_free: * and that it's unused (b_count=0), unlocked, and clean. */ init_buffer(bh, dev, block, end_buffer_io_sync, NULL); - bh->b_state=0; - insert_into_queues(bh); + bh->b_count = 1; + bh->b_state = 0; + + /* Insert the buffer into the regular lists */ + insert_into_lru_list(bh); + insert_into_hash_list(bh); goto out; /* @@ -781,9 +755,9 @@ void set_writetime(struct buffer_head * buf, int flag) */ static void file_buffer(struct buffer_head *bh, int list) { - remove_from_queues(bh); + remove_from_lru_list(bh); bh->b_list = list; - insert_into_queues(bh); + insert_into_lru_list(bh); } /* @@ -813,7 +787,7 @@ static inline void balance_dirty (kdev_t dev) * A buffer may need to be moved from one buffer list to another * (e.g. in case it is not shared any more). Handle this. */ -void __refile_buffer(struct buffer_head * buf) +void refile_buffer(struct buffer_head * buf) { int dispose; @@ -829,7 +803,7 @@ void __refile_buffer(struct buffer_head * buf) dispose = BUF_CLEAN; if(dispose != buf->b_list) { file_buffer(buf, dispose); - if(dispose == BUF_DIRTY) + if (dispose == BUF_DIRTY) balance_dirty(buf->b_dev); } } @@ -1282,89 +1256,143 @@ static int create_page_buffers (int rw, struct page *page, kdev_t dev, int b[], } tail->b_this_page = head; page->buffers = head; + get_page(page); return 0; } /* - * Can the buffer be thrown out? + * We don't have to release all buffers here, but + * we have to be sure that no dirty buffer is left + * and no IO is going on (no buffer is locked), because + * we have truncated the file and are going to free the + * blocks on-disk.. */ -#define BUFFER_BUSY_BITS ((1<b_count || ((bh)->b_state & BUFFER_BUSY_BITS)) - -static int page_idle(struct page *page, int sync) +int generic_block_flushpage(struct inode *inode, struct page *page, unsigned long offset) { struct buffer_head *head, *bh, *next; + unsigned int curr_off = 0; + + + if (!PageLocked(page)) + BUG(); + if (!page->buffers) + return 0; head = page->buffers; bh = head; do { + unsigned int next_off = curr_off + bh->b_size; next = bh->b_this_page; - if (bh->b_blocknr) { - if (buffer_locked(bh)) { - wait_on_buffer(bh); - return 0; - } - if (buffer_dirty(bh)) { - if (sync) { + /* + * is this block fully flushed? + */ + if (offset <= curr_off) { + if (bh->b_blocknr) { + if (buffer_locked(bh)) wait_on_buffer(bh); - ll_rw_block(WRITE, 1, &bh); - return 0; - } else - clear_bit(BH_Dirty, &bh->b_state); + clear_bit(BH_Dirty, &bh->b_state); + if(bh->b_dev == B_FREE) + BUG(); + remove_from_lru_list(bh); + bh->b_blocknr = 0; } } + curr_off = next_off; bh = next; } while (bh != head); - return 1; + + /* + * subtle. We release buffer-heads only if this is + * the 'final' flushpage. We invalidate the bmap + * cached value in all cases. + */ + if (!offset) { + buffermem += PAGE_SIZE; + try_to_free_buffers(page); + } + + return 0; } -/* - * We dont have to release all buffers here, but - * we have to be sure that no dirty buffer is left - * and no IO is going on (no buffer is locked), because - * we are going to free the underlying page. - */ -int generic_block_flushpage(struct inode *inode, struct page *page, int sync) +static inline void create_empty_buffers (struct page *page, + struct inode *inode, unsigned long blocksize) { - struct buffer_head *head, *bh, *next; + struct buffer_head *bh, *head, *tail; + head = create_buffers(page_address(page), blocksize, 1); + if (page->buffers) + BUG(); + + bh = head; + do { + bh->b_dev = inode->i_dev; + tail = bh; + bh = bh->b_this_page; + } while (bh); + tail->b_this_page = head; + page->buffers = head; +} + +int block_write_full_page (struct file *file, struct page *page, fs_getblock_t fs_get_block) +{ + struct dentry *dentry = file->f_dentry; + struct inode *inode = dentry->d_inode; + int err, created, i; + unsigned long block, phys, offset; + struct buffer_head *bh, *head; if (!PageLocked(page)) BUG(); + if (!page->buffers) - BUG(); + create_empty_buffers(page, inode, inode->i_sb->s_blocksize); + head = page->buffers; - while (!page_idle(page, sync)); + offset = page->offset; + block = offset >> inode->i_sb->s_blocksize_bits; + + // FIXME: currently we assume page alignment. + if (offset & (PAGE_SIZE-1)) + BUG(); - head = page->buffers; bh = head; + i = 0; do { - next = bh->b_this_page; - if (bh->b_blocknr) { - if(bh->b_dev == B_FREE) { - remove_from_free_list(bh); - } else { - if (bh->b_list == BUF_DIRTY) { - nr_buffers--; - nr_buffers_type[BUF_DIRTY]--; - remove_from_lru_list(bh); - } - } + if (!bh) + BUG(); + + if (!bh->b_blocknr) { + err = -EIO; + down(&inode->i_sem); + phys = fs_get_block (inode, block, 1, &err, &created); + up(&inode->i_sem); + if (!phys) + goto out; + + init_buffer(bh, inode->i_dev, phys, end_buffer_io_sync, NULL); + bh->b_state = (1<b_end_io = end_buffer_io_sync; + set_bit(BH_Uptodate, &bh->b_state); } - bh->b_state = 0; - bh->b_count = 0; - put_unused_buffer_head(bh); - bh = next; + mark_buffer_dirty(bh, 0); + + bh = bh->b_this_page; + block++; } while (bh != head); - page->buffers = NULL; + SetPageUptodate(page); return 0; +out: + ClearPageUptodate(page); + return err; } - -long block_write_one_page (struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf, fs_getblock_t fs_get_block) +int block_write_one_page (struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf, fs_getblock_t fs_get_block) { struct dentry *dentry = file->f_dentry; struct inode *inode = dentry->d_inode; @@ -1373,7 +1401,7 @@ long block_write_one_page (struct file *file, struct page *page, unsigned long o unsigned long blocksize, start_block, end_block; unsigned long start_offset, start_bytes, end_bytes; unsigned long bbits, phys, blocks, i, len; - struct buffer_head *bh; + struct buffer_head *bh, *head; char * target_buf; target_buf = (char *)page_address(page) + offset; @@ -1383,22 +1411,9 @@ long block_write_one_page (struct file *file, struct page *page, unsigned long o BUG(); blocksize = inode->i_sb->s_blocksize; - if (!page->buffers) { - struct buffer_head *head, *tail; - - head = create_buffers(page_address(page), blocksize, 1); - if (page->buffers) - BUG(); - - bh = head; - do { - bh->b_dev = inode->i_dev; - tail = bh; - bh = bh->b_this_page; - } while (bh); - tail->b_this_page = head; - page->buffers = head; - } + if (!page->buffers) + create_empty_buffers(page, inode, blocksize); + head = page->buffers; bbits = inode->i_sb->s_blocksize_bits; block = page->offset >> bbits; @@ -1425,8 +1440,8 @@ long block_write_one_page (struct file *file, struct page *page, unsigned long o if (page->offset & (PAGE_SIZE-1)) BUG(); - bh = page->buffers; i = 0; + bh = head; do { if (!bh) BUG(); @@ -1434,9 +1449,38 @@ long block_write_one_page (struct file *file, struct page *page, unsigned long o if ((i < start_block) || (i > end_block)) { goto skip; } + unlock_kernel(); + + err = -EFAULT; + if (start_offset) { + len = start_bytes; + start_offset = 0; + } else + if (end_bytes && (i == end_block)) { + len = end_bytes; + end_bytes = 0; + } else { + /* + * Overwritten block. + */ + len = blocksize; + } + if (copy_from_user(target_buf, buf, len)) + goto out_nolock; + target_buf += len; + buf += len; + + /* + * we dirty buffers only after copying the data into + * the page - this way we can dirty the buffer even if + * the bh is still doing IO. + */ + lock_kernel(); if (!bh->b_blocknr) { - phys = fs_get_block (inode, block, 1, &err, &created); err = -EIO; + down(&inode->i_sem); + phys = fs_get_block (inode, block, 1, &err, &created); + up(&inode->i_sem); if (!phys) goto out; @@ -1458,48 +1502,20 @@ long block_write_one_page (struct file *file, struct page *page, unsigned long o lock_kernel(); init_buffer(bh, inode->i_dev, phys, end_buffer_io_sync, NULL); - bh->b_state = (1<b_list = BUF_DIRTY; - insert_into_dirty_queue(bh); + bh->b_state = (1<b_end_io = end_buffer_io_sync; - set_bit(BH_Dirty, &bh->b_state); set_bit(BH_Uptodate, &bh->b_state); } - unlock_kernel(); - - err = -EFAULT; - if (start_offset) { - len = start_bytes; - start_offset = 0; - } else - if (end_bytes && (i == end_block)) { - len = end_bytes; - end_bytes = 0; - } else { - /* - * Overwritten block. - */ - len = blocksize; - } - if (copy_from_user(target_buf, buf, len)) - goto out_nolock; - target_buf += len; - buf += len; - - lock_kernel(); - if (bh->b_list != BUF_DIRTY) { - bh->b_list = BUF_DIRTY; - insert_into_dirty_queue(bh); - } + mark_buffer_dirty(bh, 0); skip: i++; block++; bh = bh->b_this_page; - } while (i < blocks); + } while (bh != head); unlock_kernel(); SetPageUptodate(page); @@ -1545,7 +1561,7 @@ int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size, int bmap) do { block = *(b++); - if (fresh && (bh->b_count != 1)) + if (fresh && (bh->b_count != 0)) BUG(); if (rw == READ) { if (!fresh) @@ -1569,12 +1585,8 @@ int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size, int bmap) if (!block) BUG(); } - set_bit(BH_Dirty, &bh->b_state); set_bit(BH_Uptodate, &bh->b_state); - if (bh->b_list != BUF_DIRTY) { - bh->b_list = BUF_DIRTY; - insert_into_dirty_queue(bh); - } + mark_buffer_dirty(bh, 0); arr[nr++] = bh; } bh = bh->b_this_page; @@ -1701,7 +1713,6 @@ static int grow_buffers(int size) tmp->b_next_free = tmp; } insert_point = tmp; - nr_buffers++; if (tmp->b_this_page) tmp = tmp->b_this_page; else @@ -1714,6 +1725,12 @@ static int grow_buffers(int size) return 1; } +/* + * Can the buffer be thrown out? + */ +#define BUFFER_BUSY_BITS ((1<b_count || ((bh)->b_state & BUFFER_BUSY_BITS)) + /* * try_to_free_buffers() checks if all the buffers on this particular page * are unused, and free's the page if so. @@ -1732,6 +1749,13 @@ int try_to_free_buffers(struct page * page) tmp = tmp->b_this_page; if (!buffer_busy(p)) continue; +{ + static int count = 30; + if (count) { + count--; + printk("bh %p (%04x:%ld): count=%d, state=0x%04x\n", p, p->b_dev, p->b_blocknr, p->b_count, p->b_state); + } +} wakeup_bdflush(0); return 0; @@ -1741,7 +1765,6 @@ int try_to_free_buffers(struct page * page) do { struct buffer_head * p = tmp; tmp = tmp->b_this_page; - nr_buffers--; remove_from_queues(p); put_unused_buffer_head(p); } while (tmp != bh); diff --git a/fs/ext2/file.c b/fs/ext2/file.c index dd4c5b38a352..806859ba0be2 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -108,34 +108,7 @@ static inline void remove_suid(struct inode *inode) static int ext2_writepage (struct file * file, struct page * page) { - struct dentry *dentry = file->f_dentry; - struct inode *inode = dentry->d_inode; - unsigned long block; - int *p, nr[PAGE_SIZE/512]; - int i, err, created; - struct buffer_head *bh; - - i = PAGE_SIZE >> inode->i_sb->s_blocksize_bits; - block = page->offset >> inode->i_sb->s_blocksize_bits; - p = nr; - bh = page->buffers; - do { - if (bh && bh->b_blocknr) - *p = bh->b_blocknr; - else - *p = ext2_getblk_block (inode, block, 1, &err, &created); - if (!*p) - return -EIO; - i--; - block++; - p++; - if (bh) - bh = bh->b_this_page; - } while (i > 0); - - /* IO start */ - brw_page(WRITE, page, inode->i_dev, nr, inode->i_sb->s_blocksize, 1); - return 0; + return block_write_full_page(file, page, ext2_getblk_block); } static long ext2_write_one_page (struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf) diff --git a/fs/inode.c b/fs/inode.c index ee8602939f1a..8b268dd41792 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -130,7 +130,6 @@ static inline void init_once(struct inode * inode) INIT_LIST_HEAD(&inode->i_hash); INIT_LIST_HEAD(&inode->i_dentry); sema_init(&inode->i_sem, 1); - sema_init(&inode->i_atomic_write, 1); } static inline void write_inode(struct inode *inode) @@ -767,9 +766,6 @@ kdevname(inode->i_dev), inode->i_ino, inode->i_count); if (atomic_read(&inode->i_sem.count) != 1) printk(KERN_ERR "iput: Aieee, semaphore in use inode %s/%ld, count=%d\n", kdevname(inode->i_dev), inode->i_ino, atomic_read(&inode->i_sem.count)); -if (atomic_read(&inode->i_atomic_write.count) != 1) -printk(KERN_ERR "iput: Aieee, atomic write semaphore in use inode %s/%ld, count=%d\n", -kdevname(inode->i_dev), inode->i_ino, atomic_read(&inode->i_sem.count)); #endif } if (inode->i_count > (1<<31)) { diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c index 6e8930c70691..8c396f3e63a0 100644 --- a/fs/minix/bitmap.c +++ b/fs/minix/bitmap.c @@ -112,14 +112,6 @@ repeat: if (j < sb->u.minix_sb.s_firstdatazone || j >= sb->u.minix_sb.s_nzones) return 0; - if (!(bh = getblk(sb->s_dev,j,BLOCK_SIZE))) { - printk("new_block: cannot get block"); - return 0; - } - memset(bh->b_data, 0, BLOCK_SIZE); - mark_buffer_uptodate(bh, 1); - mark_buffer_dirty(bh, 1); - brelse(bh); return j; } diff --git a/fs/minix/file.c b/fs/minix/file.c index f6ddda02140c..55ed5fd5d93b 100644 --- a/fs/minix/file.c +++ b/fs/minix/file.c @@ -27,7 +27,51 @@ #include #include -static ssize_t minix_file_write(struct file *, const char *, size_t, loff_t *); +static int minix_writepage(struct file *file, struct page *page) +{ + struct dentry *dentry = file->f_dentry; + struct inode *inode = dentry->d_inode; + unsigned long block; + int *p, nr[PAGE_SIZE/BLOCK_SIZE]; + int i, err, created; + struct buffer_head *bh; + + i = PAGE_SIZE / BLOCK_SIZE; + block = page->offset / BLOCK_SIZE; + p = nr; + bh = page->buffers; + do { + if (bh && bh->b_blocknr) + *p = bh->b_blocknr; + else + *p = minix_getblk_block(inode, block, 1, &err, &created); + if (!*p) + return -EIO; + i--; + block++; + p++; + if (bh) + bh = bh->b_this_page; + } while(i > 0); + + /* IO start */ + brw_page(WRITE, page, inode->i_dev, nr, BLOCK_SIZE, 1); + return 0; +} + +static long minix_write_one_page(struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char *buf) +{ + return block_write_one_page(file, page, offset, bytes, buf, minix_getblk_block); +} + +/* + * Write to a file (through the page cache). + */ +static ssize_t +minix_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) +{ + return generic_file_write(file, buf, count, ppos, minix_write_one_page); +} /* * We have mostly NULLs here: the current defaults are OK for @@ -61,74 +105,12 @@ struct inode_operations minix_file_inode_operations = { NULL, /* readlink */ NULL, /* follow_link */ generic_readpage, /* readpage */ - NULL, /* writepage */ + minix_writepage, /* writepage */ minix_bmap, /* bmap */ minix_truncate, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL, /* updatepage */ + NULL, /* revalidate */ + generic_block_flushpage,/* flushpage */ }; - -static ssize_t minix_file_write(struct file * filp, const char * buf, - size_t count, loff_t *ppos) -{ - struct inode * inode = filp->f_dentry->d_inode; - off_t pos; - ssize_t written, c; - struct buffer_head * bh; - char * p; - - if (!inode) { - printk("minix_file_write: inode = NULL\n"); - return -EINVAL; - } - if (!S_ISREG(inode->i_mode)) { - printk("minix_file_write: mode = %07o\n",inode->i_mode); - return -EINVAL; - } - if (filp->f_flags & O_APPEND) - pos = inode->i_size; - else - pos = *ppos; - written = 0; - while (written < count) { - bh = minix_getblk(inode,pos/BLOCK_SIZE,1); - if (!bh) { - if (!written) - written = -ENOSPC; - break; - } - c = BLOCK_SIZE - (pos % BLOCK_SIZE); - if (c > count-written) - c = count-written; - if (c != BLOCK_SIZE && !buffer_uptodate(bh)) { - ll_rw_block(READ, 1, &bh); - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) { - brelse(bh); - if (!written) - written = -EIO; - break; - } - } - p = (pos % BLOCK_SIZE) + bh->b_data; - c -= copy_from_user(p,buf,c); - if (!c) { - brelse(bh); - if (!written) - written = -EFAULT; - break; - } - update_vm_cache(inode, pos, p, c); - mark_buffer_uptodate(bh, 1); - mark_buffer_dirty(bh, 0); - brelse(bh); - pos += c; - written += c; - buf += c; - } - if (pos > inode->i_size) - inode->i_size = pos; - inode->i_mtime = inode->i_ctime = CURRENT_TIME; - *ppos = pos; - mark_inode_dirty(inode); - return written; -} diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 5a29c53e0a83..088de42dcbd2 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -407,7 +407,7 @@ static int V2_block_bmap(struct buffer_head * bh, int nr) return tmp; } -static int V2_minix_bmap(struct inode * inode,int block) +static int V2_minix_bmap(struct inode * inode, int block) { int i; @@ -454,7 +454,7 @@ static int V2_minix_bmap(struct inode * inode,int block) /* * The global minix fs bmap function. */ -int minix_bmap(struct inode * inode,int block) +int minix_bmap(struct inode * inode, int block) { if (INODE_VERSION(inode) == MINIX_V1) return V1_minix_bmap(inode, block); @@ -465,8 +465,8 @@ int minix_bmap(struct inode * inode,int block) /* * The minix V1 fs getblk functions. */ -static struct buffer_head * V1_inode_getblk(struct inode * inode, int nr, - int create) +static struct buffer_head * V1_inode_getblk(struct inode * inode, int nr, int create, + int metadata, int *phys_block, int *created) { int tmp; unsigned short *p; @@ -476,31 +476,51 @@ static struct buffer_head * V1_inode_getblk(struct inode * inode, int nr, repeat: tmp = *p; if (tmp) { - result = getblk(inode->i_dev, tmp, BLOCK_SIZE); - if (tmp == *p) - return result; - brelse(result); - goto repeat; + if (metadata) { + result = getblk(inode->i_dev, tmp, BLOCK_SIZE); + if (tmp == *p) + return result; + brelse(result); + goto repeat; + } else { + *phys_block = tmp; + return NULL; + } } if (!create) return NULL; tmp = minix_new_block(inode->i_sb); if (!tmp) return NULL; - result = getblk(inode->i_dev, tmp, BLOCK_SIZE); - if (*p) { - minix_free_block(inode->i_sb,tmp); - brelse(result); - goto repeat; + if (metadata) { + result = getblk(inode->i_dev, tmp, BLOCK_SIZE); + if (*p) { + minix_free_block(inode->i_sb, tmp); + brelse(result); + goto repeat; + } + memset(result->b_data, 0, BLOCK_SIZE); + mark_buffer_uptodate(result, 1); + mark_buffer_dirty(result, 1); + } else { + if (*p) { + minix_free_block(inode->i_sb, tmp); + goto repeat; + } + *phys_block = tmp; + result = NULL; + *created = 1; } *p = tmp; + inode->i_ctime = CURRENT_TIME; mark_inode_dirty(inode); return result; } static struct buffer_head * V1_block_getblk(struct inode * inode, - struct buffer_head * bh, int nr, int create) + struct buffer_head * bh, int nr, int create, + int metadata, int *phys_block, int *created) { int tmp; unsigned short *p; @@ -520,13 +540,19 @@ static struct buffer_head * V1_block_getblk(struct inode * inode, repeat: tmp = *p; if (tmp) { - result = getblk(bh->b_dev, tmp, BLOCK_SIZE); - if (tmp == *p) { + if (metadata) { + result = getblk(bh->b_dev, tmp, BLOCK_SIZE); + if (tmp == *p) { + brelse(bh); + return result; + } + brelse(result); + goto repeat; + } else { + *phys_block = tmp; brelse(bh); - return result; + return NULL; } - brelse(result); - goto repeat; } if (!create) { brelse(bh); @@ -537,49 +563,74 @@ repeat: brelse(bh); return NULL; } - result = getblk(bh->b_dev, tmp, BLOCK_SIZE); - if (*p) { - minix_free_block(inode->i_sb,tmp); - brelse(result); - goto repeat; + if (metadata) { + result = getblk(bh->b_dev, tmp, BLOCK_SIZE); + if (*p) { + minix_free_block(inode->i_sb, tmp); + brelse(result); + goto repeat; + } + memset(result->b_data, 0, BLOCK_SIZE); + mark_buffer_uptodate(result, 1); + mark_buffer_dirty(result, 1); + } else { + if (*p) { + minix_free_block(inode->i_sb, tmp); + goto repeat; + } + *phys_block = tmp; + result = NULL; + *created = 1; } + *p = tmp; mark_buffer_dirty(bh, 1); brelse(bh); return result; } -static struct buffer_head * V1_minix_getblk(struct inode * inode, int block, - int create) +int V1_getblk_block(struct inode * inode, long block, int create, int *err, int *created) { - struct buffer_head * bh; + struct buffer_head *bh, *tmp; + int phys_block; - if (block<0) { + *err = -EIO; + if (block < 0) { printk("minix_getblk: block<0"); - return NULL; + return 0; } if (block >= inode->i_sb->u.minix_sb.s_max_size/BLOCK_SIZE) { printk("minix_getblk: block>big"); - return NULL; + return 0; + } + *created = 0; + if (block < 7) { + tmp = V1_inode_getblk(inode, block, create, + 0, &phys_block, created); + goto out; } - if (block < 7) - return V1_inode_getblk(inode,block,create); block -= 7; if (block < 512) { - bh = V1_inode_getblk(inode,7,create); - return V1_block_getblk(inode, bh, block, create); + bh = V1_inode_getblk(inode, 7, create, 1, NULL, NULL); + tmp = V1_block_getblk(inode, bh, block, create, + 0, &phys_block, created); + goto out; } block -= 512; - bh = V1_inode_getblk(inode,8,create); - bh = V1_block_getblk(inode, bh, (block>>9) & 511, create); - return V1_block_getblk(inode, bh, block & 511, create); + bh = V1_inode_getblk(inode, 8, create, 1, NULL, NULL); + bh = V1_block_getblk(inode, bh, (block>>9) & 511, create, 1, NULL, NULL); + tmp = V1_block_getblk(inode, bh, block & 511, create, 0, &phys_block, created); + +out: + *err = 0; + return phys_block; } /* * The minix V2 fs getblk functions. */ -static struct buffer_head * V2_inode_getblk(struct inode * inode, int nr, - int create) +static struct buffer_head * V2_inode_getblk(struct inode * inode, int nr, int create, + int metadata, int *phys_block, int *created) { int tmp; unsigned long *p; @@ -589,31 +640,51 @@ static struct buffer_head * V2_inode_getblk(struct inode * inode, int nr, repeat: tmp = *p; if (tmp) { - result = getblk(inode->i_dev, tmp, BLOCK_SIZE); - if (tmp == *p) - return result; - brelse(result); - goto repeat; + if (metadata) { + result = getblk(inode->i_dev, tmp, BLOCK_SIZE); + if (tmp == *p) + return result; + brelse(result); + goto repeat; + } else { + *phys_block = tmp; + return NULL; + } } if (!create) return NULL; tmp = minix_new_block(inode->i_sb); if (!tmp) return NULL; - result = getblk(inode->i_dev, tmp, BLOCK_SIZE); - if (*p) { - minix_free_block(inode->i_sb,tmp); - brelse(result); - goto repeat; + if (metadata) { + result = getblk(inode->i_dev, tmp, BLOCK_SIZE); + if (*p) { + minix_free_block(inode->i_sb, tmp); + brelse(result); + goto repeat; + } + memset(result->b_data, 0, BLOCK_SIZE); + mark_buffer_uptodate(result, 1); + mark_buffer_dirty(result, 1); + } else { + if (*p) { + minix_free_block(inode->i_sb, tmp); + goto repeat; + } + *phys_block = tmp; + result = NULL; + *created = 1; } *p = tmp; + inode->i_ctime = CURRENT_TIME; mark_inode_dirty(inode); return result; } static struct buffer_head * V2_block_getblk(struct inode * inode, - struct buffer_head * bh, int nr, int create) + struct buffer_head * bh, int nr, int create, + int metadata, int *phys_block, int *created) { int tmp; unsigned long *p; @@ -633,13 +704,19 @@ static struct buffer_head * V2_block_getblk(struct inode * inode, repeat: tmp = *p; if (tmp) { - result = getblk(bh->b_dev, tmp, BLOCK_SIZE); - if (tmp == *p) { + if (metadata) { + result = getblk(bh->b_dev, tmp, BLOCK_SIZE); + if (tmp == *p) { + brelse(bh); + return result; + } + brelse(result); + goto repeat; + } else { + *phys_block = tmp; brelse(bh); - return result; + return NULL; } - brelse(result); - goto repeat; } if (!create) { brelse(bh); @@ -650,60 +727,107 @@ repeat: brelse(bh); return NULL; } - result = getblk(bh->b_dev, tmp, BLOCK_SIZE); - if (*p) { - minix_free_block(inode->i_sb,tmp); - brelse(result); - goto repeat; + if (metadata) { + result = getblk(bh->b_dev, tmp, BLOCK_SIZE); + if (*p) { + minix_free_block(inode->i_sb, tmp); + brelse(result); + goto repeat; + } + memset(result->b_data, 0, BLOCK_SIZE); + mark_buffer_uptodate(result, 1); + mark_buffer_dirty(result, 1); + } else { + if (*p) { + minix_free_block(inode->i_sb, tmp); + goto repeat; + } + *phys_block = tmp; + result = NULL; + *created = 1; } + *p = tmp; mark_buffer_dirty(bh, 1); brelse(bh); return result; } -static struct buffer_head * V2_minix_getblk(struct inode * inode, int block, - int create) +int V2_getblk_block(struct inode * inode, int block, int create, int *err, int *created) { - struct buffer_head * bh; + struct buffer_head * bh, *tmp; + int phys_block; - if (block<0) { + *err = -EIO; + if (block < 0) { printk("minix_getblk: block<0"); - return NULL; + return 0; } if (block >= inode->i_sb->u.minix_sb.s_max_size/BLOCK_SIZE) { printk("minix_getblk: block>big"); - return NULL; + return 0; + } + *created = 0; + if (block < 7) { + tmp = V2_inode_getblk(inode, block, create, + 0, &phys_block, created); + goto out; } - if (block < 7) - return V2_inode_getblk(inode,block,create); block -= 7; if (block < 256) { - bh = V2_inode_getblk(inode,7,create); - return V2_block_getblk(inode, bh, block, create); + bh = V2_inode_getblk(inode, 7, create, 1, NULL, NULL); + tmp = V2_block_getblk(inode, bh, block, create, + 0, &phys_block, created); + goto out; } block -= 256; if (block < 256*256) { - bh = V2_inode_getblk(inode,8,create); - bh = V2_block_getblk(inode, bh, (block>>8) & 255, create); - return V2_block_getblk(inode, bh, block & 255, create); + bh = V2_inode_getblk(inode, 8, create, 1, NULL, NULL); + bh = V2_block_getblk(inode, bh, (block>>8) & 255, create, + 1, NULL, NULL); + tmp = V2_block_getblk(inode, bh, block & 255, create, + 0, &phys_block, created); + goto out; } block -= 256*256; - bh = V2_inode_getblk(inode,9,create); - bh = V2_block_getblk(inode, bh, (block >> 16) & 255, create); - bh = V2_block_getblk(inode, bh, (block >> 8) & 255, create); - return V2_block_getblk(inode, bh, block & 255, create); + bh = V2_inode_getblk(inode, 9, create, 1, NULL, NULL); + bh = V2_block_getblk(inode, bh, (block >> 16) & 255, create, 1, NULL, NULL); + bh = V2_block_getblk(inode, bh, (block >> 8) & 255, create, 1, NULL, NULL); + tmp = V2_block_getblk(inode, bh, block & 255, create, 0, &phys_block, created); + +out: + *err = 0; + return phys_block; +} + +int minix_getblk_block (struct inode *inode, long block, + int create, int *err, int *created) +{ + if (INODE_VERSION(inode) == MINIX_V1) + return V1_getblk_block(inode, block, create, err, created); + else + return V2_getblk_block(inode, block, create, err, created); } /* * the global minix fs getblk function. */ -struct buffer_head * minix_getblk(struct inode * inode, int block, int create) +struct buffer_head *minix_getblk (struct inode *inode, int block, int create) { - if (INODE_VERSION(inode) == MINIX_V1) - return V1_minix_getblk(inode,block,create); - else - return V2_minix_getblk(inode,block,create); + struct buffer_head *tmp = NULL; + int phys_block; + int err, created; + + phys_block = minix_getblk_block(inode, block, create, &err, &created); + if (phys_block) { + tmp = getblk(inode->i_dev, phys_block, BLOCK_SIZE); + if (created) { + memset(tmp->b_data, 0, BLOCK_SIZE); + mark_buffer_uptodate(tmp, 1); + mark_buffer_dirty(tmp, 1); + } + } + return tmp; } struct buffer_head * minix_bread(struct inode * inode, int block, int create) diff --git a/fs/minix/truncate.c b/fs/minix/truncate.c index a94806fdf2f5..4718e092e48a 100644 --- a/fs/minix/truncate.c +++ b/fs/minix/truncate.c @@ -32,6 +32,9 @@ * general case (size = XXX). I hope. */ +#define DATA_BUFFER_USED(bh) \ + ((bh->b_count > 1) || buffer_locked(bh)) + /* * The functions for minix V1 fs truncation. */ @@ -52,7 +55,7 @@ repeat: brelse(bh); goto repeat; } - if ((bh && bh->b_count != 1) || tmp != *p) { + if ((bh && DATA_BUFFER_USED(bh)) || tmp != *p) { retry = 1; brelse(bh); continue; @@ -103,7 +106,7 @@ repeat: brelse(bh); goto repeat; } - if ((bh && bh->b_count != 1) || tmp != *ind) { + if ((bh && DATA_BUFFER_USED(bh)) || tmp != *ind) { retry = 1; brelse(bh); continue; @@ -216,7 +219,7 @@ repeat: brelse(bh); goto repeat; } - if ((bh && bh->b_count != 1) || tmp != *p) { + if ((bh && DATA_BUFFER_USED(bh)) || tmp != *p) { retry = 1; brelse(bh); continue; @@ -267,7 +270,7 @@ repeat: brelse(bh); goto repeat; } - if ((bh && bh->b_count != 1) || tmp != *ind) { + if ((bh && DATA_BUFFER_USED(bh)) || tmp != *ind) { retry = 1; brelse(bh); continue; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index f137542feb54..c64a02229672 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -118,6 +118,61 @@ struct nfs_cookie_table { }; static kmem_cache_t *nfs_cookie_cachep; +/* This whole scheme relies on the fact that dirent cookies + * are monotonically increasing. + * + * Another invariant is that once we have a valid non-zero + * EOF marker cached, we also have the complete set of cookie + * table entries. + * + * We return the page offset assosciated with the page where + * cookie must be if it exists at all, however if we can not + * figure that out conclusively, we return < 0. + */ +static long __nfs_readdir_offset(struct inode *inode, __u32 cookie) +{ + struct nfs_cookie_table *p; + unsigned long ret = 0; + + for(p = NFS_COOKIES(inode); p != NULL; p = p->next) { + int i; + + for (i = 0; i < COOKIES_PER_CHUNK; i++) { + __u32 this_cookie = p->cookies[i]; + + /* End of known cookies, EOF is our only hope. */ + if (!this_cookie) + goto check_eof; + + /* Next cookie is larger, must be in previous page. */ + if (this_cookie > cookie) + return ret; + + ret += 1; + + /* Exact cookie match, it must be in this page :-) */ + if (this_cookie == cookie) + return ret; + } + } +check_eof: + if (NFS_DIREOF(inode) != 0) + return ret; + + return -1L; +} + +static __inline__ long nfs_readdir_offset(struct inode *inode, __u32 cookie) +{ + /* Cookie zero is always at page offset zero. Optimize the + * other common case since most directories fit entirely + * in one page. + */ + if (!cookie || (!NFS_COOKIES(inode) && NFS_DIREOF(inode))) + return 0; + return __nfs_readdir_offset(inode, cookie); +} + /* Since a cookie of zero is declared special by the NFS * protocol, we easily can tell if a cookie in an existing * table chunk is valid or not. @@ -148,38 +203,7 @@ static __inline__ __u32 *find_cookie(struct inode *inode, unsigned long off) return ret; } -/* Now we cache directories properly, by stuffing the dirent - * data directly in the page cache. - * - * Inode invalidation due to refresh etc. takes care of - * _everything_, no sloppy entry flushing logic, no extraneous - * copying, network direct to page cache, the way it was meant - * to be. - * - * NOTE: Dirent information verification is done always by the - * page-in of the RPC reply, nowhere else, this simplies - * things substantially. - */ #define NFS_NAMELEN_ALIGN(__len) ((((__len)+3)>>2)<<2) -static u32 find_midpoint(__u32 *p, u32 doff) -{ - u32 walk = doff & PAGE_MASK; - - while(*p++ != 0) { - __u32 skip; - - p++; /* skip fileid */ - - /* Skip len, name, and cookie. */ - skip = NFS_NAMELEN_ALIGN(*p++); - p += (skip >> 2) + 1; - walk += skip + (4 * sizeof(__u32)); - if (walk >= doff) - break; - } - return walk; -} - static int create_cookie(__u32 cookie, unsigned long off, struct inode *inode) { struct nfs_cookie_table **cpp; @@ -211,28 +235,37 @@ static int create_cookie(__u32 cookie, unsigned long off, struct inode *inode) return 0; } -static struct page *try_to_get_dirent_page(struct file *, unsigned long, int); +static struct page *try_to_get_dirent_page(struct file *, __u32, int); /* Recover from a revalidation flush. The case here is that * the inode for the directory got invalidated somehow, and * all of our cached information is lost. In order to get * a correct cookie for the current readdir request from the * user, we must (re-)fetch older readdir page cache entries. + * + * Returns < 0 if some error occurrs, else it is the page offset + * to fetch. */ -static int refetch_to_readdir_off(struct file *file, struct inode *inode, u32 off) +static long refetch_to_readdir_cookie(struct file *file, struct inode *inode) { struct page *page; - u32 cur_off, goal_off = off & PAGE_MASK; + u32 goal_cookie = file->f_pos; + long cur_off, ret = -1L; again: cur_off = 0; - while (cur_off < goal_off) { + for (;;) { page = find_get_page(inode, cur_off); if (page) { if (!Page_Uptodate(page)) goto out_error; } else { - page = try_to_get_dirent_page(file, cur_off, 0); + __u32 *cp = find_cookie(inode, cur_off); + + if (!cp) + goto out_error; + + page = try_to_get_dirent_page(file, *cp, 0); if (!page) { if (!cur_off) goto out_error; @@ -243,17 +276,33 @@ again: } page_cache_release(page); - cur_off += PAGE_SIZE; + if ((ret = nfs_readdir_offset(inode, goal_cookie)) >= 0) + goto out; + + cur_off += 1; } - return 0; +out: + return ret; out_error: if (page) page_cache_release(page); - return -1; + goto out; } -static struct page *try_to_get_dirent_page(struct file *file, unsigned long offset, int refetch_ok) +/* Now we cache directories properly, by stuffing the dirent + * data directly in the page cache. + * + * Inode invalidation due to refresh etc. takes care of + * _everything_, no sloppy entry flushing logic, no extraneous + * copying, network direct to page cache, the way it was meant + * to be. + * + * NOTE: Dirent information verification is done always by the + * page-in of the RPC reply, nowhere else, this simplies + * things substantially. + */ +static struct page *try_to_get_dirent_page(struct file *file, __u32 cookie, int refetch_ok) { struct nfs_readdirargs rd_args; struct nfs_readdirres rd_res; @@ -261,6 +310,7 @@ static struct page *try_to_get_dirent_page(struct file *file, unsigned long offs struct inode *inode = dentry->d_inode; struct page *page, **hash; unsigned long page_cache; + long offset; __u32 *cookiep; page = NULL; @@ -268,10 +318,19 @@ static struct page *try_to_get_dirent_page(struct file *file, unsigned long offs if (!page_cache) goto out; - while ((cookiep = find_cookie(inode, offset)) == NULL) { + if ((offset = nfs_readdir_offset(inode, cookie)) < 0) { if (!refetch_ok || - refetch_to_readdir_off(file, inode, file->f_pos)) + (offset = refetch_to_readdir_cookie(file, inode)) < 0) { + page_cache_free(page_cache); goto out; + } + } + + cookiep = find_cookie(inode, offset); + if (!cookiep) { + /* Gross fatal error. */ + page_cache_free(page_cache); + goto out; } hash = page_hash(inode, offset); @@ -302,8 +361,7 @@ repeat: } while(rd_res.bufsiz > 0); if (rd_res.bufsiz < 0) - NFS_DIREOF(inode) = - (offset << PAGE_CACHE_SHIFT) + -(rd_res.bufsiz); + NFS_DIREOF(inode) = rd_res.cookie; else if (create_cookie(rd_res.cookie, offset, inode)) goto error; @@ -318,31 +376,35 @@ error: goto unlock_out; } -static __inline__ u32 nfs_do_filldir(__u32 *p, u32 doff, +/* Seek up to dirent assosciated with the passed in cookie, + * then fill in dirents found. Return the last cookie + * actually given to the user, to update the file position. + */ +static __inline__ u32 nfs_do_filldir(__u32 *p, u32 cookie, void *dirent, filldir_t filldir) { u32 end; - if (doff & ~PAGE_CACHE_MASK) { - doff = find_midpoint(p, doff); - p += (doff & ~PAGE_CACHE_MASK) >> 2; - } while((end = *p++) != 0) { - __u32 fileid = *p++; - __u32 len = *p++; - __u32 skip = NFS_NAMELEN_ALIGN(len); - char *name = (char *) p; - - /* Skip the cookie. */ - p = ((__u32 *) (name + skip)) + 1; - if (filldir(dirent, name, len, doff, fileid) < 0) - goto out; - doff += (skip + (4 * sizeof(__u32))); + __u32 fileid, len, skip, this_cookie; + char *name; + + fileid = *p++; + len = *p++; + name = (char *) p; + skip = NFS_NAMELEN_ALIGN(len); + p += (skip >> 2); + this_cookie = *p++; + + if (this_cookie < cookie) + continue; + + cookie = this_cookie; + if (filldir(dirent, name, len, cookie, fileid) < 0) + break; } - if (!*p) - doff = PAGE_CACHE_ALIGN(doff); -out: - return doff; + + return cookie; } /* The file offset position is represented in pure bytes, to @@ -357,7 +419,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) struct dentry *dentry = filp->f_dentry; struct inode *inode = dentry->d_inode; struct page *page, **hash; - unsigned long offset; + long offset; int res; res = nfs_revalidate_inode(NFS_DSERVER(dentry), dentry); @@ -367,7 +429,9 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (NFS_DIREOF(inode) && filp->f_pos >= NFS_DIREOF(inode)) return 0; - offset = filp->f_pos >> PAGE_CACHE_SHIFT; + if ((offset = nfs_readdir_offset(inode, filp->f_pos)) < 0) + goto no_dirent_page; + hash = page_hash(inode, offset); page = __find_get_page(inode, offset, *hash); if (!page) @@ -381,7 +445,7 @@ success: return 0; no_dirent_page: - page = try_to_get_dirent_page(filp, offset, 1); + page = try_to_get_dirent_page(filp, filp->f_pos, 1); if (!page) goto no_page; @@ -393,20 +457,39 @@ no_page: return -EIO; } -/* Invalidate directory cookie caches and EOF marker - * for an inode. +/* Flush directory cookie and EOF caches for an inode. + * So we don't thrash allocating/freeing cookie tables, + * we keep the cookies around until the inode is + * deleted/reused. + */ +__inline__ void nfs_flush_dircache(struct inode *inode) +{ + struct nfs_cookie_table *p = NFS_COOKIES(inode); + + while (p != NULL) { + int i; + + for(i = 0; i < COOKIES_PER_CHUNK; i++) + p->cookies[i] = 0; + + p = p->next; + } + NFS_DIREOF(inode) = 0; +} + +/* Free up directory cache state, this happens when + * nfs_delete_inode is called on an NFS directory. */ -__inline__ void nfs_invalidate_dircache(struct inode *inode) +void nfs_free_dircache(struct inode *inode) { struct nfs_cookie_table *p = NFS_COOKIES(inode); - if (p != NULL) { - NFS_COOKIES(inode) = NULL; - do { struct nfs_cookie_table *next = p->next; - kmem_cache_free(nfs_cookie_cachep, p); - p = next; - } while (p != NULL); + while (p != NULL) { + struct nfs_cookie_table *next = p->next; + kmem_cache_free(nfs_cookie_cachep, p); + p = next; } + NFS_COOKIES(inode) = NULL; NFS_DIREOF(inode) = 0; } @@ -532,11 +615,11 @@ out_bad: /* Purge readdir caches. */ if (dentry->d_parent->d_inode) { invalidate_inode_pages(dentry->d_parent->d_inode); - nfs_invalidate_dircache(dentry->d_parent->d_inode); + nfs_flush_dircache(dentry->d_parent->d_inode); } if (inode && S_ISDIR(inode->i_mode)) { invalidate_inode_pages(inode); - nfs_invalidate_dircache(inode); + nfs_flush_dircache(inode); } return 0; } @@ -733,7 +816,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode) * Invalidate the dir cache before the operation to avoid a race. */ invalidate_inode_pages(dir); - nfs_invalidate_dircache(dir); + nfs_flush_dircache(dir); error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dentry->d_parent), dentry->d_name.name, &sattr, &fhandle, &fattr); if (!error) @@ -763,7 +846,7 @@ static int nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, int rde sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1; invalidate_inode_pages(dir); - nfs_invalidate_dircache(dir); + nfs_flush_dircache(dir); error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dentry->d_parent), dentry->d_name.name, &sattr, &fhandle, &fattr); if (!error) @@ -798,7 +881,7 @@ static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) */ d_drop(dentry); invalidate_inode_pages(dir); - nfs_invalidate_dircache(dir); + nfs_flush_dircache(dir); error = nfs_proc_mkdir(NFS_DSERVER(dentry), NFS_FH(dentry->d_parent), dentry->d_name.name, &sattr, &fhandle, &fattr); return error; @@ -819,7 +902,7 @@ dentry->d_inode->i_count, dentry->d_inode->i_nlink); #endif invalidate_inode_pages(dir); - nfs_invalidate_dircache(dir); + nfs_flush_dircache(dir); error = nfs_proc_rmdir(NFS_SERVER(dir), NFS_FH(dentry->d_parent), dentry->d_name.name); @@ -947,7 +1030,7 @@ dentry->d_parent->d_name.name, dentry->d_name.name); } while(sdentry->d_inode != NULL); /* need negative lookup */ invalidate_inode_pages(dir); - nfs_invalidate_dircache(dir); + nfs_flush_dircache(dir); error = nfs_proc_rename(NFS_SERVER(dir), NFS_FH(dentry->d_parent), dentry->d_name.name, NFS_FH(dentry->d_parent), silly); @@ -1017,7 +1100,7 @@ inode->i_count, inode->i_nlink); d_delete(dentry); } invalidate_inode_pages(dir); - nfs_invalidate_dircache(dir); + nfs_flush_dircache(dir); error = nfs_proc_remove(NFS_SERVER(dir), NFS_FH(dentry->d_parent), dentry->d_name.name); /* @@ -1084,7 +1167,7 @@ dentry->d_parent->d_name.name, dentry->d_name.name); */ d_drop(dentry); invalidate_inode_pages(dir); - nfs_invalidate_dircache(dir); + nfs_flush_dircache(dir); error = nfs_proc_symlink(NFS_SERVER(dir), NFS_FH(dentry->d_parent), dentry->d_name.name, symname, &sattr); if (!error) { @@ -1115,7 +1198,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) */ d_drop(dentry); invalidate_inode_pages(dir); - nfs_invalidate_dircache(dir); + nfs_flush_dircache(dir); error = nfs_proc_link(NFS_DSERVER(old_dentry), NFS_FH(old_dentry), NFS_FH(dentry->d_parent), dentry->d_name.name); if (!error) { @@ -1261,9 +1344,9 @@ new_inode->i_count, new_inode->i_nlink); } invalidate_inode_pages(new_dir); - nfs_invalidate_dircache(new_dir); + nfs_flush_dircache(new_dir); invalidate_inode_pages(old_dir); - nfs_invalidate_dircache(old_dir); + nfs_flush_dircache(old_dir); error = nfs_proc_rename(NFS_DSERVER(old_dentry), NFS_FH(old_dentry->d_parent), old_dentry->d_name.name, NFS_FH(new_dentry->d_parent), new_dentry->d_name.name); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index c7e684763f2a..5421cebf99a8 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -99,23 +99,28 @@ nfs_delete_inode(struct inode * inode) int failed; dprintk("NFS: delete_inode(%x/%ld)\n", inode->i_dev, inode->i_ino); - /* - * Flush out any pending write requests ... - */ - if (NFS_WRITEBACK(inode) != NULL) { - unsigned long timeout = jiffies + 5*HZ; + + if (S_ISDIR(inode->i_mode)) { + nfs_free_dircache(inode); + } else { + /* + * Flush out any pending write requests ... + */ + if (NFS_WRITEBACK(inode) != NULL) { + unsigned long timeout = jiffies + 5*HZ; #ifdef NFS_DEBUG_VERBOSE printk("nfs_delete_inode: inode %ld has pending RPC requests\n", inode->i_ino); #endif - nfs_inval(inode); - while (NFS_WRITEBACK(inode) != NULL && - time_before(jiffies, timeout)) { - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(HZ/10); + nfs_inval(inode); + while (NFS_WRITEBACK(inode) != NULL && + time_before(jiffies, timeout)) { + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(HZ/10); + } + current->state = TASK_RUNNING; + if (NFS_WRITEBACK(inode) != NULL) + printk("NFS: Arghhh, stuck RPC requests!\n"); } - current->state = TASK_RUNNING; - if (NFS_WRITEBACK(inode) != NULL) - printk("NFS: Arghhh, stuck RPC requests!\n"); } failed = nfs_check_failed_request(inode); @@ -433,7 +438,7 @@ nfs_zap_caches(struct inode *inode) invalidate_inode_pages(inode); if (S_ISDIR(inode->i_mode)) - nfs_invalidate_dircache(inode); + nfs_flush_dircache(inode); } /* @@ -477,8 +482,6 @@ nfs_fill_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_size = fattr->size; inode->i_mtime = fattr->mtime.seconds; NFS_OLDMTIME(inode) = fattr->mtime.seconds; - NFS_COOKIES(inode) = NULL; - NFS_WRITEBACK(inode) = NULL; } nfs_refresh_inode(inode, fattr); } diff --git a/fs/pipe.c b/fs/pipe.c index 90b5df368d4f..dd4f6cd19665 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -102,9 +102,7 @@ static ssize_t pipe_write(struct file * filp, const char * buf, free = count; else free = 1; /* can't do it atomically, wait for any free space */ - up(&inode->i_sem); - if (down_interruptible(&inode->i_atomic_write)) { - down(&inode->i_sem); + if (down_interruptible(&inode->i_sem)) { return -ERESTARTSYS; } while (count>0) { @@ -145,8 +143,7 @@ static ssize_t pipe_write(struct file * filp, const char * buf, inode->i_ctime = inode->i_mtime = CURRENT_TIME; mark_inode_dirty(inode); errout: - up(&inode->i_atomic_write); - down(&inode->i_sem); + up(&inode->i_sem); return written ? written : err; } @@ -254,6 +251,7 @@ static int pipe_release(struct inode * inode) inode->i_pipe = NULL; free_page((unsigned long) info->base); kfree(info); + return 0; } wake_up_interruptible(&PIPE_WAIT(*inode)); return 0; diff --git a/fs/read_write.c b/fs/read_write.c index 7b9bf0bf7593..c7ea90a69d75 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -166,9 +166,7 @@ asmlinkage ssize_t sys_write(unsigned int fd, const char * buf, size_t count) if (!file->f_op || !(write = file->f_op->write)) goto out; - down(&inode->i_sem); ret = write(file, buf, count, &file->f_pos); - up(&inode->i_sem); out: fput(file); bad_file: @@ -304,9 +302,7 @@ asmlinkage ssize_t sys_writev(unsigned long fd, const struct iovec * vector, if (!file) goto bad_file; if (file->f_op && file->f_op->write && (file->f_mode & FMODE_WRITE)) { - down(&file->f_dentry->d_inode->i_sem); ret = do_readv_writev(VERIFY_READ, file, vector, count); - up(&file->f_dentry->d_inode->i_sem); } fput(file); @@ -376,10 +372,7 @@ asmlinkage ssize_t sys_pwrite(unsigned int fd, const char * buf, if (pos < 0) goto out; - down(&file->f_dentry->d_inode->i_sem); ret = write(file, buf, count, &pos); - up(&file->f_dentry->d_inode->i_sem); - out: fput(file); bad_file: diff --git a/fs/sysv/file.c b/fs/sysv/file.c index d60be8fa5ebf..9e806e4d1375 100644 --- a/fs/sysv/file.c +++ b/fs/sysv/file.c @@ -33,7 +33,51 @@ #include #include -static ssize_t sysv_file_write(struct file *, const char *, size_t, loff_t *); +static int sysv_writepage (struct file * file, struct page * page) +{ + struct dentry *dentry = file->f_dentry; + struct inode *inode = dentry->d_inode; + unsigned long block; + int *p, nr[PAGE_SIZE/512]; + int i, err, created; + struct buffer_head *bh; + + i = PAGE_SIZE >> inode->i_sb->sv_block_size_bits; + block = page->offset >> inode->i_sb->sv_block_size_bits; + p = nr; + bh = page->buffers; + do { + if (bh && bh->b_blocknr) + *p = bh->b_blocknr; + else + *p = sysv_getblk_block (inode, block, 1, &err, &created); + if (!*p) + return -EIO; + i--; + block++; + p++; + if (bh) + bh = bh->b_this_page; + } while (i > 0); + + /* IO start */ + brw_page(WRITE, page, inode->i_dev, nr, inode->i_sb->sv_block_size, 1); + return 0; +} + +static long sysv_write_one_page (struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf) +{ + return block_write_one_page(file, page, offset, bytes, buf, sysv_getblk_block); +} + +/* + * Write to a file (through the page cache). + */ +static ssize_t +sysv_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) +{ + return generic_file_write(file, buf, count, ppos, sysv_write_one_page); +} /* * We have mostly NULLs here: the current defaults are OK for @@ -41,7 +85,7 @@ static ssize_t sysv_file_write(struct file *, const char *, size_t, loff_t *); */ static struct file_operations sysv_file_operations = { NULL, /* lseek - default */ - sysv_file_read, /* read */ + generic_file_read, /* read */ sysv_file_write, /* write */ NULL, /* readdir - bad */ NULL, /* poll - default */ @@ -50,7 +94,10 @@ static struct file_operations sysv_file_operations = { NULL, /* no special open is needed */ NULL, /* flush */ NULL, /* release */ - sysv_sync_file /* fsync */ + sysv_sync_file, /* fsync */ + NULL, /* fasync */ + NULL, /* check_media_change */ + NULL /* revalidate */ }; struct inode_operations sysv_file_inode_operations = { @@ -67,208 +114,12 @@ struct inode_operations sysv_file_inode_operations = { NULL, /* readlink */ NULL, /* follow_link */ generic_readpage, /* readpage */ - NULL, /* writepage */ + sysv_writepage, /* writepage */ sysv_bmap, /* bmap */ sysv_truncate, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL, /* updatepage */ + NULL, /* revalidate */ + generic_block_flushpage,/* flushpage */ }; - -ssize_t sysv_file_read(struct file * filp, char * buf, - size_t count, loff_t *ppos) -{ - struct inode * inode = filp->f_dentry->d_inode; - struct super_block * sb = inode->i_sb; - ssize_t read,left,chars; - size_t block; - ssize_t blocks, offset; - int bhrequest, uptodate; - struct buffer_head ** bhb, ** bhe; - struct buffer_head * bhreq[NBUF]; - struct buffer_head * buflist[NBUF]; - size_t size; - - if (!inode) { - printk("sysv_file_read: inode = NULL\n"); - return -EINVAL; - } - if (!S_ISREG(inode->i_mode)) { - printk("sysv_file_read: mode = %07o\n",inode->i_mode); - return -EINVAL; - } - offset = *ppos; - size = inode->i_size; - if (offset > size) - left = 0; - else - left = size - offset; - if (left > count) - left = count; - if (left <= 0) - return 0; - read = 0; - block = offset >> sb->sv_block_size_bits; - offset &= sb->sv_block_size_1; - size = (size + sb->sv_block_size_1) >> sb->sv_block_size_bits; - blocks = (left + offset + sb->sv_block_size_1) >> sb->sv_block_size_bits; - bhb = bhe = buflist; - if (filp->f_reada) { - blocks += read_ahead[MAJOR(inode->i_dev)] >> (sb->sv_block_size_bits - 9); - if (block + blocks > size) - blocks = size - block; - } - - /* We do this in a two stage process. We first try to request - as many blocks as we can, then we wait for the first one to - complete, and then we try to wrap up as many as are actually - done. This routine is rather generic, in that it can be used - in a filesystem by substituting the appropriate function in - for getblk. - - This routine is optimized to make maximum use of the various - buffers and caches. - */ - - do { - bhrequest = 0; - uptodate = 1; - while (blocks) { - --blocks; - *bhb = sysv_getblk(inode, block++, 0); - if (*bhb && !buffer_uptodate(*bhb)) { - uptodate = 0; - bhreq[bhrequest++] = *bhb; - } - - if (++bhb == &buflist[NBUF]) - bhb = buflist; - - /* If the block we have on hand is uptodate, go ahead - and complete processing. */ - if (uptodate) - break; - if (bhb == bhe) - break; - } - - /* Now request them all */ - if (bhrequest) - ll_rw_block(READ, bhrequest, bhreq); - - do { /* Finish off all I/O that has actually completed */ - if (*bhe) { - wait_on_buffer(*bhe); - if (!buffer_uptodate(*bhe)) { /* read error? */ - brelse(*bhe); - if (++bhe == &buflist[NBUF]) - bhe = buflist; - left = 0; - break; - } - } - if (left < sb->sv_block_size - offset) - chars = left; - else - chars = sb->sv_block_size - offset; - *ppos += chars; - left -= chars; - read += chars; - if (*bhe) { - copy_to_user(buf,offset+(*bhe)->b_data,chars); - brelse(*bhe); - buf += chars; - } else { - while (chars-- > 0) - put_user(0,buf++); - } - offset = 0; - if (++bhe == &buflist[NBUF]) - bhe = buflist; - } while (left > 0 && bhe != bhb && (!*bhe || !buffer_locked(*bhe))); - } while (left > 0); - -/* Release the read-ahead blocks */ - while (bhe != bhb) { - brelse(*bhe); - if (++bhe == &buflist[NBUF]) - bhe = buflist; - }; - if (!read) - return -EIO; - filp->f_reada = 1; - if (!IS_RDONLY(inode)) { - inode->i_atime = CURRENT_TIME; - mark_inode_dirty(inode); - } - return read; -} - -static ssize_t sysv_file_write(struct file * filp, const char * buf, - size_t count, loff_t *ppos) -{ - struct inode * inode = filp->f_dentry->d_inode; - struct super_block * sb = inode->i_sb; - off_t pos; - ssize_t written, c; - struct buffer_head * bh; - char * p; - - if (!inode) { - printk("sysv_file_write: inode = NULL\n"); - return -EINVAL; - } - if (!S_ISREG(inode->i_mode)) { - printk("sysv_file_write: mode = %07o\n",inode->i_mode); - return -EINVAL; - } -/* - * OK, append may not work when many processes are writing at the same time - * but so what. That way leads to madness anyway. - * But we need to protect against simultaneous truncate as we may end up - * writing our data into blocks that have meanwhile been incorporated into - * the freelist, thereby trashing the freelist. - */ - if (filp->f_flags & O_APPEND) - pos = inode->i_size; - else - pos = *ppos; - written = 0; - while (written> sb->sv_block_size_bits, 1); - if (!bh) { - if (!written) - written = -ENOSPC; - break; - } - c = sb->sv_block_size - (pos & sb->sv_block_size_1); - if (c > count-written) - c = count-written; - if (c != sb->sv_block_size && !buffer_uptodate(bh)) { - ll_rw_block(READ, 1, &bh); - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) { - brelse(bh); - if (!written) - written = -EIO; - break; - } - } - /* now either c==sb->sv_block_size or buffer_uptodate(bh) */ - p = (pos & sb->sv_block_size_1) + bh->b_data; - copy_from_user(p, buf, c); - update_vm_cache(inode, pos, p, c); - pos += c; - if (pos > inode->i_size) { - inode->i_size = pos; - mark_inode_dirty(inode); - } - written += c; - buf += c; - mark_buffer_uptodate(bh, 1); - mark_buffer_dirty(bh, 0); - brelse(bh); - } - inode->i_mtime = inode->i_ctime = CURRENT_TIME; - *ppos = pos; - mark_inode_dirty(inode); - return written; -} diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index f8d508c3de5a..d335b5b501ec 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -657,7 +657,8 @@ int sysv_bmap(struct inode * inode,int block_nr) /* Access selected blocks of regular files (or directories) */ -static struct buffer_head * inode_getblk(struct inode * inode, int nr, int create) +static struct buffer_head * inode_getblk(struct inode * inode, int nr, int create, + int metadata, int *phys_block, int *created) { struct super_block *sb; u32 tmp; @@ -669,31 +670,48 @@ static struct buffer_head * inode_getblk(struct inode * inode, int nr, int creat repeat: tmp = *p; if (tmp) { - result = sv_getblk(sb, inode->i_dev, tmp); - if (tmp == *p) - return result; - brelse(result); - goto repeat; + if (metadata) { + result = sv_getblk(sb, inode->i_dev, tmp); + if (tmp == *p) + return result; + brelse(result); + goto repeat; + } else { + *phys_block = tmp; + return NULL; + } } if (!create) return NULL; tmp = sysv_new_block(sb); if (!tmp) return NULL; - result = sv_getblk(sb, inode->i_dev, tmp); - if (*p) { - sysv_free_block(sb,tmp); - brelse(result); - goto repeat; + if (metadata) { + result = sv_getblk(sb, inode->i_dev, tmp); + if (*p) { + sysv_free_block(sb, tmp); + brelse(result); + goto repeat; + } + } else { + if (*p) { + sysv_free_block(sb, tmp); + goto repeat; + } + *phys_block = tmp; + result = NULL; + *created = 1; } *p = tmp; + inode->i_ctime = CURRENT_TIME; mark_inode_dirty(inode); return result; } static struct buffer_head * block_getblk(struct inode * inode, - struct buffer_head * bh, int nr, int create) + struct buffer_head * bh, int nr, int create, + int metadata, int *phys_block, int *created) { struct super_block *sb; u32 tmp, block; @@ -717,13 +735,19 @@ repeat: if (sb->sv_convert) block = from_coh_ulong(block); if (tmp) { - result = sv_getblk(sb, bh->b_dev, block); - if (tmp == *p) { + if (metadata) { + result = sv_getblk(sb, bh->b_dev, block); + if (tmp == *p) { + brelse(bh); + return result; + } + brelse(result); + goto repeat; + } else { + *phys_block = tmp; brelse(bh); - return result; + return NULL; } - brelse(result); - goto repeat; } if (!create) { brelse(bh); @@ -734,11 +758,17 @@ repeat: brelse(bh); return NULL; } - result = sv_getblk(sb, bh->b_dev, block); - if (*p) { - sysv_free_block(sb,block); - brelse(result); - goto repeat; + if (metadata) { + result = sv_getblk(sb, bh->b_dev, block); + if (*p) { + sysv_free_block(sb,block); + brelse(result); + goto repeat; + } + } else { + *phys_block = tmp; + result = NULL; + *created = 1; } *p = (sb->sv_convert ? to_coh_ulong(block) : block); mark_buffer_dirty(bh, 1); @@ -746,37 +776,74 @@ repeat: return result; } -struct buffer_head * sysv_getblk(struct inode * inode, unsigned int block, int create) +int sysv_getblk_block(struct inode *inode, long block, int create, + int *err, int *created) { - struct super_block * sb = inode->i_sb; - struct buffer_head * bh; + struct super_block *sb = inode->i_sb; + struct buffer_head *bh, *tmp; + int phys_block; - if (block < 10) - return inode_getblk(inode,block,create); + *err = -EIO; + if (block < 0) { + printk("sysv_getblk: block<0"); + return 0; + } + if (block > sb->sv_ind_per_block_3) { + printk("sysv_getblk: block>big"); + return 0; + } + if (block < 10) { + tmp = inode_getblk(inode, block, create, + 0, &phys_block, created); + goto out; + } block -= 10; if (block < sb->sv_ind_per_block) { - bh = inode_getblk(inode,10,create); - return block_getblk(inode, bh, block, create); + bh = inode_getblk(inode, 10, create, 1, NULL, NULL); + tmp = block_getblk(inode, bh, block, create, + 0, &phys_block, created); + goto out; } block -= sb->sv_ind_per_block; if (block < sb->sv_ind_per_block_2) { - bh = inode_getblk(inode,11,create); - bh = block_getblk(inode, bh, block >> sb->sv_ind_per_block_bits, create); - return block_getblk(inode, bh, block & sb->sv_ind_per_block_1, create); + bh = inode_getblk(inode, 11, create, 1, NULL, NULL); + bh = block_getblk(inode, bh, block >> sb->sv_ind_per_block_bits, create, + 1, NULL, NULL); + tmp = block_getblk(inode, bh, block & sb->sv_ind_per_block_1, create, + 0, &phys_block, created); + goto out; } block -= sb->sv_ind_per_block_2; - if (block < sb->sv_ind_per_block_3) { - bh = inode_getblk(inode,12,create); - bh = block_getblk(inode, bh, block >> sb->sv_ind_per_block_2_bits, create); - bh = block_getblk(inode, bh, (block >> sb->sv_ind_per_block_bits) & sb->sv_ind_per_block_1, create); - return block_getblk(inode, bh, block & sb->sv_ind_per_block_1, create); - } - if ((int)block<0) { - printk("sysv_getblk: block<0"); - return NULL; + bh = inode_getblk(inode, 12, create, 1, NULL, NULL); + bh = block_getblk(inode, bh, block >> sb->sv_ind_per_block_2_bits, create, + 1, NULL, NULL); + bh = block_getblk(inode, bh, + (block >> sb->sv_ind_per_block_bits) & sb->sv_ind_per_block_1, + create, 1, NULL, NULL); + tmp = block_getblk(inode, bh, block & sb->sv_ind_per_block_1, create, + 0, &phys_block, created); + +out: + *err = 0; + return phys_block; +} + +struct buffer_head *sysv_getblk (struct inode *inode, unsigned int block, int create) +{ + struct buffer_head *tmp = NULL; + int phys_block; + int err, created; + + phys_block = sysv_getblk_block(inode, block, create, &err, &created); + if (phys_block) { + tmp = getblk(inode->i_dev, phys_block, BLOCK_SIZE); + if (created) { + memset(tmp->b_data, 0, BLOCK_SIZE); + mark_buffer_uptodate(tmp, 1); + mark_buffer_dirty(tmp, 1); + } } - printk("sysv_getblk: block>big"); - return NULL; + return tmp; } struct buffer_head * sysv_file_bread(struct inode * inode, int block, int create) diff --git a/fs/sysv/truncate.c b/fs/sysv/truncate.c index c318648a99bf..a8c0e074561c 100644 --- a/fs/sysv/truncate.c +++ b/fs/sysv/truncate.c @@ -35,6 +35,9 @@ * general case (size = XXX). I hope. */ +#define DATA_BUFFER_USED(bh) \ + ((bh->b_count > 1) || buffer_locked(bh)) + /* We throw away any data beyond inode->i_size. */ static int trunc_direct(struct inode * inode) @@ -58,7 +61,7 @@ repeat: brelse(bh); goto repeat; } - if ((bh && bh->b_count != 1) || (block != *p)) { + if ((bh && DATA_BUFFER_USED(bh)) || (block != *p)) { retry = 1; brelse(bh); continue; @@ -115,7 +118,7 @@ repeat: brelse(bh); goto repeat; } - if ((bh && bh->b_count != 1) || (tmp != *ind)) { + if ((bh && DATA_BUFFER_USED(bh)) || (tmp != *ind)) { retry = 1; brelse(bh); continue; @@ -128,7 +131,7 @@ repeat: for (i = 0; i < sb->sv_ind_per_block; i++) if (((sysv_zone_t *) indbh->b_data)[i]) goto done; - if ((indbh->b_count != 1) || (indtmp != *p)) { + if (DATA_BUFFER_USED(indbh) || (indtmp != *p)) { brelse(indbh); return 1; } @@ -185,7 +188,7 @@ static int trunc_dindirect(struct inode * inode, unsigned long offset, sysv_zone for (i = 0; i < sb->sv_ind_per_block; i++) if (((sysv_zone_t *) indbh->b_data)[i]) goto done; - if ((indbh->b_count != 1) || (indtmp != *p)) { + if (DATA_BUFFER_USED(indbh) || (indtmp != *p)) { brelse(indbh); return 1; } @@ -242,7 +245,7 @@ static int trunc_tindirect(struct inode * inode, unsigned long offset, sysv_zone for (i = 0; i < sb->sv_ind_per_block; i++) if (((sysv_zone_t *) indbh->b_data)[i]) goto done; - if ((indbh->b_count != 1) || (indtmp != *p)) { + if (DATA_BUFFER_USED(indbh) || (indtmp != *p)) { brelse(indbh); return 1; } diff --git a/fs/ufs/file.c b/fs/ufs/file.c index 7e94bfd1c8ac..57db16baefa0 100644 --- a/fs/ufs/file.c +++ b/fs/ufs/file.c @@ -41,52 +41,6 @@ #define MIN(a,b) (((a)<(b))?(a):(b)) #define MAX(a,b) (((a)>(b))?(a):(b)) -static long long ufs_file_lseek(struct file *, long long, int); -static ssize_t ufs_file_write (struct file *, const char *, size_t, loff_t *); -static int ufs_release_file (struct inode *, struct file *); - -/* - * We have mostly NULL's here: the current defaults are ok for - * the ufs filesystem. - */ -static struct file_operations ufs_file_operations = { - ufs_file_lseek, /* lseek */ - generic_file_read, /* read */ - ufs_file_write, /* write */ - NULL, /* readdir - bad */ - NULL, /* poll - default */ - NULL, /* ioctl */ - generic_file_mmap, /* mmap */ - NULL, /* no special open is needed */ - NULL, /* flush */ - ufs_release_file, /* release */ - NULL, /* fsync */ - NULL, /* fasync */ - NULL, /* check_media_change */ - NULL /* revalidate */ -}; - -struct inode_operations ufs_file_inode_operations = { - &ufs_file_operations,/* default file operations */ - NULL, /* create */ - NULL, /* lookup */ - NULL, /* link */ - NULL, /* unlink */ - NULL, /* symlink */ - NULL, /* mkdir */ - NULL, /* rmdir */ - NULL, /* mknod */ - NULL, /* rename */ - NULL, /* readlink */ - NULL, /* follow_link */ - generic_readpage, /* readpage */ - NULL, /* writepage */ - ufs_bmap, /* bmap */ - ufs_truncate, /* truncate */ - NULL, /* permission */ - NULL /* smap */ -}; - /* * Make sure the offset never goes beyond the 32-bit mark.. */ @@ -133,139 +87,49 @@ static inline void remove_suid(struct inode *inode) } } -static ssize_t ufs_file_write ( - struct file * filp, - const char * buf, - size_t count, - loff_t *ppos ) +static int ufs_writepage (struct file *file, struct page *page) { - struct inode * inode = filp->f_dentry->d_inode; - __u32 pos; - long block; - int offset; - int written, c; - struct buffer_head * bh, *bufferlist[NBUF]; - struct super_block * sb; - int err; - int i,buffercount,write_error; - - /* POSIX: mtime/ctime may not change for 0 count */ - if (!count) - return 0; - write_error = buffercount = 0; - if (!inode) - return -EINVAL; - sb = inode->i_sb; - if (sb->s_flags & MS_RDONLY) - /* - * This fs has been automatically remounted ro because of errors - */ - return -ENOSPC; - - if (!S_ISREG(inode->i_mode)) { - ufs_warning (sb, "ufs_file_write", "mode = %07o", - inode->i_mode); - return -EINVAL; - } - remove_suid(inode); - - if (filp->f_flags & O_APPEND) - pos = inode->i_size; - else { - pos = *ppos; - if (pos != *ppos) - return -EINVAL; - } - - /* Check for overflow.. */ - if (pos > (__u32) (pos + count)) { - count = ~pos; /* == 0xFFFFFFFF - pos */ - if (!count) - return -EFBIG; - } - - /* - * If a file has been opened in synchronous mode, we have to ensure - * that meta-data will also be written synchronously. Thus, we - * set the i_osync field. This field is tested by the allocation - * routines. - */ - if (filp->f_flags & O_SYNC) - inode->u.ufs_i.i_osync++; - block = pos >> sb->s_blocksize_bits; - offset = pos & (sb->s_blocksize - 1); - c = sb->s_blocksize - offset; - written = 0; + struct dentry *dentry = file->f_dentry; + struct inode *inode = dentry->d_inode; + unsigned long block; + int *p, nr[PAGE_SIZE/512]; + int i, err, created; + struct buffer_head *bh; + + i = PAGE_SIZE >> inode->i_sb->s_blocksize_bits; + block = page->offset >> inode->i_sb->s_blocksize_bits; + p = nr; + bh = page->buffers; do { - bh = ufs_getfrag (inode, block, 1, &err); - if (!bh) { - if (!written) - written = err; - break; - } - if (c > count) - c = count; - if (c != sb->s_blocksize && !buffer_uptodate(bh)) { - ll_rw_block (READ, 1, &bh); - wait_on_buffer (bh); - if (!buffer_uptodate(bh)) { - brelse (bh); - if (!written) - written = -EIO; - break; - } - } - c -= copy_from_user (bh->b_data + offset, buf, c); - if (!c) { - brelse(bh); - if (!written) - written = -EFAULT; - break; - } - update_vm_cache(inode, pos, bh->b_data + offset, c); - pos += c; - written += c; - buf += c; - count -= c; - mark_buffer_uptodate(bh, 1); - mark_buffer_dirty(bh, 0); - if (filp->f_flags & O_SYNC) - bufferlist[buffercount++] = bh; + if (bh && bh->b_blocknr) + *p = bh->b_blocknr; else - brelse(bh); - if (buffercount == NBUF){ - ll_rw_block(WRITE, buffercount, bufferlist); - for(i=0; is_blocksize; - } while (count); - if (buffercount){ - ll_rw_block(WRITE, buffercount, bufferlist); - for (i=0; i inode->i_size) - inode->i_size = pos; - if (filp->f_flags & O_SYNC) - inode->u.ufs_i.i_osync--; - inode->i_ctime = inode->i_mtime = CURRENT_TIME; - *ppos = pos; - mark_inode_dirty(inode); - return written; + p++; + if (bh) + bh = bh->b_this_page; + } while (i > 0); + + brw_page(WRITE, page, inode->i_dev, nr, inode->i_sb->s_blocksize, 1); + return 0; +} + +static long ufs_write_one_page(struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char *buf) +{ + return block_write_one_page(file, page, offset, bytes, buf, ufs_getfrag_block); +} + +/* + * Write to a file (through the page cache). + */ +static ssize_t +ufs_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) +{ + return generic_file_write(file, buf, count, ppos, ufs_write_one_page); } /* @@ -277,3 +141,48 @@ static int ufs_release_file (struct inode * inode, struct file * filp) { return 0; } + +/* + * We have mostly NULL's here: the current defaults are ok for + * the ufs filesystem. + */ +static struct file_operations ufs_file_operations = { + ufs_file_lseek, /* lseek */ + generic_file_read, /* read */ + ufs_file_write, /* write */ + NULL, /* readdir - bad */ + NULL, /* poll - default */ + NULL, /* ioctl */ + generic_file_mmap, /* mmap */ + NULL, /* no special open is needed */ + NULL, /* flush */ + ufs_release_file, /* release */ + NULL, /* fsync */ + NULL, /* fasync */ + NULL, /* check_media_change */ + NULL /* revalidate */ +}; + +struct inode_operations ufs_file_inode_operations = { + &ufs_file_operations,/* default file operations */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + generic_readpage, /* readpage */ + ufs_writepage, /* writepage */ + ufs_bmap, /* bmap */ + ufs_truncate, /* truncate */ + NULL, /* permission */ + NULL, /* smap */ + NULL, /* updatepage */ + NULL, /* revalidate */ + generic_block_flushpage,/* flushpage */ +}; diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index a5a51bac5252..636b0aabd551 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -175,7 +175,7 @@ int ufs_bmap (struct inode * inode, int fragment) static struct buffer_head * ufs_inode_getfrag (struct inode * inode, unsigned fragment, unsigned new_fragment, int create, - unsigned required, int * err ) + unsigned required, int *err, int metadata, int *phys_block, int *created) { struct super_block * sb; struct ufs_sb_private_info * uspi; @@ -201,13 +201,19 @@ repeat: tmp = SWAB32(*p); lastfrag = inode->u.ufs_i.i_lastfrag; if (tmp && fragment < lastfrag) { - result = getblk (sb->s_dev, uspi->s_sbbase + tmp + blockoff, sb->s_blocksize); - if (tmp == SWAB32(*p)) { - UFSD(("EXIT, result %u\n", tmp + blockoff)) - return result; + if (metadata) { + result = getblk (sb->s_dev, uspi->s_sbbase + tmp + blockoff, + sb->s_blocksize); + if (tmp == SWAB32(*p)) { + UFSD(("EXIT, result %u\n", tmp + blockoff)) + return result; + } + brelse (result); + goto repeat; + } else { + *phys_block = tmp; + return NULL; } - brelse (result); - goto repeat; } *err = -EFBIG; if (!create) @@ -269,7 +275,20 @@ repeat: else return NULL; } - result = getblk (inode->i_dev, tmp + blockoff, sb->s_blocksize); + + /* The nullification of framgents done in ufs/balloc.c is + * something I don't have the stomache to move into here right + * now. -DaveM + */ + if (metadata) { + result = getblk (inode->i_dev, tmp + blockoff, sb->s_blocksize); + } else { + *phys_block = tmp; + result = NULL; + *err = 0; + *created = 1; + } + inode->i_ctime = CURRENT_TIME; if (IS_SYNC(inode)) ufs_sync_inode (inode); @@ -280,7 +299,7 @@ repeat: static struct buffer_head * ufs_block_getfrag (struct inode * inode, struct buffer_head * bh, unsigned fragment, unsigned new_fragment, - int create, unsigned blocksize, int * err) + int create, unsigned blocksize, int * err, int metadata, int *phys_block, int *created) { struct super_block * sb; struct ufs_sb_private_info * uspi; @@ -312,19 +331,36 @@ static struct buffer_head * ufs_block_getfrag (struct inode * inode, repeat: tmp = SWAB32(*p); if (tmp) { - result = getblk (bh->b_dev, uspi->s_sbbase + tmp + blockoff, sb->s_blocksize); - if (tmp == SWAB32(*p)) { + if (metadata) { + result = getblk (bh->b_dev, uspi->s_sbbase + tmp + blockoff, + sb->s_blocksize); + if (tmp == SWAB32(*p)) { + brelse (bh); + UFSD(("EXIT, result %u\n", tmp + blockoff)) + return result; + } + brelse (result); + goto repeat; + } else { + *phys_block = tmp; brelse (bh); - UFSD(("EXIT, result %u\n", tmp + blockoff)) - return result; + return NULL; } - brelse (result); - goto repeat; } - if (!create || new_fragment >= (current->rlim[RLIMIT_FSIZE].rlim_cur >> sb->s_blocksize)) { + *err = -EFBIG; + if (!create) { brelse (bh); - *err = -EFBIG; return NULL; + } else { + unsigned long limit = current->rlim[RLIMIT_FSIZE].rlim_cur; + if (limit < RLIM_INFINITY) { + limit >>= sb->s_blocksize_bits; + if (new_fragment >= limit) { + brelse (bh); + send_sig(SIGXFSZ, current, 0); + return NULL; + } + } } if (block && (tmp = SWAB32(((u32*)bh->b_data)[block-1]) + uspi->s_fpb)) goal = tmp + uspi->s_fpb; @@ -334,12 +370,25 @@ repeat: if (!tmp) { if (SWAB32(*p)) { goto repeat; - } - else { + } else { + brelse (bh); return NULL; } } - result = getblk (bh->b_dev, tmp + blockoff, sb->s_blocksize); + + /* The nullification of framgents done in ufs/balloc.c is + * something I don't have the stomache to move into here right + * now. -DaveM + */ + if (metadata) { + result = getblk (bh->b_dev, tmp + blockoff, sb->s_blocksize); + } else { + *phys_block = tmp; + result = NULL; + *err = 0; + *created = 1; + } + mark_buffer_dirty(bh, 1); if (IS_SYNC(inode)) { ll_rw_block (WRITE, 1, &bh); @@ -352,14 +401,15 @@ repeat: return result; } -struct buffer_head * ufs_getfrag (struct inode * inode, unsigned fragment, - int create, int * err) +int ufs_getfrag_block (struct inode * inode, long fragment, + int create, int * err, int *created) { struct super_block * sb; struct ufs_sb_private_info * uspi; - struct buffer_head * bh; + struct buffer_head * bh, * tmp; unsigned f; unsigned swab; + int phys_block; sb = inode->i_sb; uspi = sb->u.ufs_sb.s_uspi; @@ -367,19 +417,27 @@ struct buffer_head * ufs_getfrag (struct inode * inode, unsigned fragment, *err = -EIO; UFSD(("ENTER, ino %lu, fragment %u\n", inode->i_ino, fragment)) + if (fragment < 0) { + ufs_warning (sb, "ufs_getblk", "block < 0"); + return 0; + } if (fragment > ((UFS_NDADDR + uspi->s_apb + uspi->s_2apb + uspi->s_3apb) << uspi->s_fpbshift)) { ufs_warning (sb, "ufs_getblk", "block > big"); - return NULL; + return 0; } *err = -ENOSPC; f = fragment; + *created = 0; /* * Direct fragment */ - if (fragment < UFS_NDIR_FRAGMENT) - return ufs_inode_getfrag (inode, fragment, fragment, create, 1, err); + if (fragment < UFS_NDIR_FRAGMENT) { + tmp = ufs_inode_getfrag (inode, fragment, fragment, create, 1, + err, 0, &phys_block, created); + goto out; + } /* * Indirect fragment */ @@ -387,10 +445,12 @@ struct buffer_head * ufs_getfrag (struct inode * inode, unsigned fragment, if (fragment < (1 << (uspi->s_apbshift + uspi->s_fpbshift))) { bh = ufs_inode_getfrag (inode, UFS_IND_FRAGMENT + (fragment >> uspi->s_apbshift), - f, create, uspi->s_fpb, err); - return ufs_block_getfrag (inode, bh, - fragment & uspi->s_apbmask, - f, create, sb->s_blocksize, err); + f, create, uspi->s_fpb, err, 1, NULL, NULL); + tmp = ufs_block_getfrag (inode, bh, + fragment & uspi->s_apbmask, + f, create, sb->s_blocksize, + err, 0, &phys_block, created); + goto out; } /* * Dindirect fragment @@ -398,14 +458,18 @@ struct buffer_head * ufs_getfrag (struct inode * inode, unsigned fragment, fragment -= 1 << (uspi->s_apbshift + uspi->s_fpbshift); if ( fragment < (1 << (uspi->s_2apbshift + uspi->s_fpbshift))) { bh = ufs_inode_getfrag (inode, - UFS_DIND_FRAGMENT + (fragment >> uspi->s_2apbshift), - f, create, uspi->s_fpb, err); + UFS_DIND_FRAGMENT + (fragment >> uspi->s_2apbshift), + f, create, uspi->s_fpb, err, + 1, NULL, NULL); bh = ufs_block_getfrag (inode, bh, (fragment >> uspi->s_apbshift) & uspi->s_apbmask, - f, create, sb->s_blocksize, err); - return ufs_block_getfrag (inode, bh, + f, create, sb->s_blocksize, err, + 1, NULL, NULL); + tmp = ufs_block_getfrag (inode, bh, fragment & uspi->s_apbmask, - f, create, sb->s_blocksize, err); + f, create, sb->s_blocksize, err, + 0, &phys_block, created); + goto out; } /* * Tindirect fragment @@ -413,19 +477,42 @@ struct buffer_head * ufs_getfrag (struct inode * inode, unsigned fragment, fragment -= 1 << (uspi->s_2apbshift + uspi->s_fpbshift); bh = ufs_inode_getfrag (inode, UFS_TIND_FRAGMENT + (fragment >> uspi->s_3apbshift), - f, create, uspi->s_fpb, err); + f, create, uspi->s_fpb, err, 1, NULL, NULL); bh = ufs_block_getfrag (inode, bh, (fragment >> uspi->s_2apbshift) & uspi->s_apbmask, - f, create, sb->s_blocksize, err); + f, create, sb->s_blocksize, err, 1, NULL, NULL); bh = ufs_block_getfrag (inode, bh, (fragment >> uspi->s_apbshift) & uspi->s_apbmask, - f, create, sb->s_blocksize, err); - return ufs_block_getfrag (inode, bh, + f, create, sb->s_blocksize, err, 1, NULL, NULL); + tmp = ufs_block_getfrag (inode, bh, fragment & uspi->s_apbmask, - f, create, sb->s_blocksize, err); -} + f, create, sb->s_blocksize, err, 0, &phys_block, created); +out: + if (!phys_block) + return 0; + if (*err) + return 0; + return phys_block; +} +struct buffer_head *ufs_getfrag(struct inode *inode, unsigned int fragment, + int create, int *err) +{ + struct buffer_head *tmp = NULL; + int phys_block, created; + + phys_block = ufs_getfrag_block(inode, fragment, create, err, &created); + if (phys_block) { + tmp = getblk(inode->i_dev, phys_block, inode->i_sb->s_blocksize); + if (created) { + memset(tmp->b_data, 0, inode->i_sb->s_blocksize); + mark_buffer_uptodate(tmp, 1); + mark_buffer_dirty(tmp, 1); + } + } + return tmp; +} struct buffer_head * ufs_bread (struct inode * inode, unsigned fragment, int create, int * err) diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index 3fec735a288d..4649a42536f8 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c @@ -62,6 +62,9 @@ #define DIRECT_BLOCK howmany (inode->i_size, uspi->s_bsize) #define DIRECT_FRAGMENT howmany (inode->i_size, uspi->s_fsize) +#define DATA_BUFFER_USED(bh) \ + ((bh->b_count > 1) || buffer_locked(bh)) + static int ufs_trunc_direct (struct inode * inode) { struct super_block * sb; @@ -114,7 +117,7 @@ static int ufs_trunc_direct (struct inode * inode) frag2 = ufs_fragnum (frag2); for (j = frag1; j < frag2; j++) { bh = get_hash_table (sb->s_dev, tmp + j, uspi->s_fsize); - if ((bh && bh->b_count != 1) || tmp != SWAB32(*p)) { + if ((bh && DATA_BUFFER_USED(bh)) || tmp != SWAB32(*p)) { retry = 1; brelse (bh); goto next1; @@ -137,7 +140,7 @@ next1: continue; for (j = 0; j < uspi->s_fpb; j++) { bh = get_hash_table (sb->s_dev, tmp + j, uspi->s_fsize); - if ((bh && bh->b_count != 1) || tmp != SWAB32(*p)) { + if ((bh && DATA_BUFFER_USED(bh)) || tmp != SWAB32(*p)) { retry = 1; brelse (bh); goto next2; @@ -176,7 +179,7 @@ next2: frag4 = ufs_fragnum (frag4); for (j = 0; j < frag4; j++) { bh = get_hash_table (sb->s_dev, tmp + j, uspi->s_fsize); - if ((bh && bh->b_count != 1) || tmp != SWAB32(*p)) { + if ((bh && DATA_BUFFER_USED(bh)) || tmp != SWAB32(*p)) { retry = 1; brelse (bh); goto next1; @@ -237,7 +240,7 @@ static int ufs_trunc_indirect (struct inode * inode, unsigned offset, u32 * p) continue; for (j = 0; j < uspi->s_fpb; j++) { bh = get_hash_table (sb->s_dev, tmp + j, uspi->s_fsize); - if ((bh && bh->b_count != 1) || tmp != SWAB32(*ind)) { + if ((bh && DATA_BUFFER_USED(bh)) || tmp != SWAB32(*ind)) { retry = 1; brelse (bh); goto next; diff --git a/include/linux/fs.h b/include/linux/fs.h index 0beb57019d24..de4143c48207 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -356,7 +356,6 @@ struct inode { unsigned long i_version; unsigned long i_nrpages; struct semaphore i_sem; - struct semaphore i_atomic_write; struct inode_operations *i_op; struct super_block *i_sb; wait_queue_head_t i_wait; @@ -622,7 +621,7 @@ struct inode_operations { int (*smap) (struct inode *,int); int (*updatepage) (struct file *, struct page *, unsigned long, unsigned int); int (*revalidate) (struct dentry *); - int (*flushpage) (struct inode *, struct page *, int); + int (*flushpage) (struct inode *, struct page *, unsigned long); }; struct super_operations { @@ -750,15 +749,7 @@ extern struct file *inuse_filps; extern void set_writetime(struct buffer_head *, int); extern int try_to_free_buffers(struct page *); -extern void __refile_buffer(struct buffer_head * buf); -extern inline void refile_buffer(struct buffer_head * buf) -{ - /* - * Subtle, we do not want to refile not hashed buffers ... - */ - if (buf->b_pprev) - __refile_buffer(buf); -} +extern void refile_buffer(struct buffer_head * buf); extern int buffermem; @@ -881,8 +872,10 @@ extern int generic_readpage(struct file *, struct page *); extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern ssize_t generic_file_read(struct file *, char *, size_t, loff_t *); extern ssize_t generic_file_write(struct file *, const char *, size_t, loff_t *, writepage_t); -extern int generic_block_flushpage(struct inode *, struct page *, int); -extern long block_write_one_page (struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf, fs_getblock_t fs_get_block); +extern int generic_block_flushpage(struct inode *, struct page *, unsigned long); +extern int block_write_one_page (struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf, fs_getblock_t fs_get_block); +extern int block_write_full_page (struct file *file, struct page *page, fs_getblock_t fs_get_block); + extern struct super_block *get_super(kdev_t); extern void put_super(kdev_t); diff --git a/include/linux/minix_fs.h b/include/linux/minix_fs.h index 4682ee56e49c..0b41889bd0ba 100644 --- a/include/linux/minix_fs.h +++ b/include/linux/minix_fs.h @@ -110,6 +110,7 @@ extern unsigned long minix_count_free_blocks(struct super_block *sb); extern int minix_bmap(struct inode *,int); extern struct buffer_head * minix_getblk(struct inode *, int, int); +extern int minix_getblk_block (struct inode *, long, int, int *, int *); extern struct buffer_head * minix_bread(struct inode *, int, int); extern void minix_truncate(struct inode *); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 613eb68722bb..d91a0b6410f4 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -192,7 +192,8 @@ extern struct inode_operations nfs_file_inode_operations; */ extern struct inode_operations nfs_dir_inode_operations; extern struct dentry_operations nfs_dentry_operations; -extern void nfs_invalidate_dircache(struct inode *); +extern void nfs_flush_dircache(struct inode *); +extern void nfs_free_dircache(struct inode *); /* * linux/fs/nfs/symlink.c diff --git a/include/linux/sysv_fs.h b/include/linux/sysv_fs.h index 49d9d24f163d..d8c6eef5cb3a 100644 --- a/include/linux/sysv_fs.h +++ b/include/linux/sysv_fs.h @@ -387,6 +387,7 @@ extern unsigned long sysv_count_free_blocks(struct super_block *sb); extern int sysv_bmap(struct inode *,int); extern struct buffer_head * sysv_getblk(struct inode *, unsigned int, int); +extern int sysv_getblk_block(struct inode *, long, int, int *, int *); extern struct buffer_head * sysv_file_bread(struct inode *, int, int); extern ssize_t sysv_file_read(struct file *, char *, size_t, loff_t *); diff --git a/include/linux/ufs_fs.h b/include/linux/ufs_fs.h index e36ceba9e9ea..2124c7e57b96 100644 --- a/include/linux/ufs_fs.h +++ b/include/linux/ufs_fs.h @@ -537,6 +537,7 @@ extern int ufs_sync_inode (struct inode *); extern void ufs_write_inode (struct inode *); extern void ufs_delete_inode (struct inode *); extern struct buffer_head * ufs_getfrag (struct inode *, unsigned, int, int *); +extern int ufs_getfrag_block (struct inode *, long, int, int *, int *); extern struct buffer_head * ufs_bread (struct inode *, unsigned, int, int *); /* namei.c */ diff --git a/mm/filemap.c b/mm/filemap.c index 2ad26debb476..b1ce3a46d7bf 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -137,8 +137,6 @@ repeat: page_cache_release(page); goto repeat; } - if (page_count(page) != 2) - printk("hm, busy page truncated? (not necesserily a bug)\n"); spin_unlock(&pagecache_lock); if (inode->i_op->flushpage) @@ -160,9 +158,6 @@ repeat: page->prev = NULL; remove_page_from_hash_queue(page); page->inode = NULL; - - if (page_count(page) != 2) - printk("hm, busy page truncated? (not necesserily a bug)\n"); spin_unlock(&pagecache_lock); UnlockPage(page); @@ -189,6 +184,13 @@ repeat: /* partial truncate, clear end of page */ if (offset < PAGE_CACHE_SIZE) { unsigned long address; + get_page(page); + if (TryLockPage(page)) { + spin_unlock(&pagecache_lock); + wait_on_page(page); + page_cache_release(page); + goto repeat; + } /* * It's worth dropping the write lock only at * this point. We are holding the page lock @@ -200,10 +202,15 @@ repeat: address = page_address(page); memset((void *) (offset + address), 0, PAGE_CACHE_SIZE - offset); flush_page_to_ram(address); + + if (inode->i_op->flushpage) + inode->i_op->flushpage(inode, page, offset); /* - * we have dropped the lock so we have to + * we have dropped the spinlock so we have to * restart. */ + UnlockPage(page); + page_cache_release(page); goto repeat; } } @@ -217,25 +224,9 @@ repeat: */ void remove_inode_page(struct page *page) { - struct inode *inode = page->inode; - if (!PageLocked(page)) PAGE_BUG(page); - /* - * We might sleep here. Other processes might arrive and sleep on - * the lock, but nobody is allowed to 'cross' the lock and get a - * reference to the page. We then remove the page from the hash - * before unlocking it. This mechanizm ensures that 1) nobody gets - * a half-freed page 2) nobody creates the same pagecache content - * before we finish destroying this page. This is not a - * performance problem as pages here are candidates for getting - * freed, ie. it's supposed to be unlikely that the above situation - * happens. - */ - if (inode->i_op->flushpage) - inode->i_op->flushpage(inode, page, 1); - spin_lock(&pagecache_lock); remove_page_from_inode_queue(page); remove_page_from_hash_queue(page); @@ -274,12 +265,27 @@ int shrink_mmap(int priority, int gfp_mask) referenced = test_and_clear_bit(PG_referenced, &page->flags); - if (PageLocked(page)) + if ((gfp_mask & __GFP_DMA) && !PageDMA(page)) continue; - if ((gfp_mask & __GFP_DMA) && !PageDMA(page)) + if (PageLocked(page)) continue; + /* Is it a buffer page? */ + if (page->buffers) { + if (buffer_under_min()) + continue; + + if (TryLockPage(page)) + continue; + err = try_to_free_buffers(page); + UnlockPage(page); + + if (!err) + continue; + goto out; + } + /* We can't free pages unless there's just one user */ if (page_count(page) != 1) continue; @@ -309,13 +315,14 @@ int shrink_mmap(int priority, int gfp_mask) goto unlock_continue; if (TryLockPage(page)) goto unlock_continue; - if (page_count(page) != 1) { - UnlockPage(page); - goto unlock_continue; + + if (page_count(page) == 1) { + remove_page_from_inode_queue(page); + remove_page_from_hash_queue(page); + page->inode = NULL; } spin_unlock(&pagecache_lock); - remove_inode_page(page); UnlockPage(page); page_cache_release(page); err = 1; @@ -325,17 +332,6 @@ unlock_continue: continue; } spin_unlock(&pagecache_lock); - - /* Is it a buffer page? */ - if (page->buffers) { - if (buffer_under_min()) - continue; - if (!try_to_free_buffers(page)) - continue; - err = 1; - goto out; - } - } while (count > 0); err = 0; out: @@ -1086,17 +1082,14 @@ static int file_send_actor(read_descriptor_t * desc, const char *area, unsigned ssize_t written; unsigned long count = desc->count; struct file *file = (struct file *) desc->buf; - struct inode *inode = file->f_dentry->d_inode; mm_segment_t old_fs; if (size > count) size = count; - down(&inode->i_sem); old_fs = get_fs(); set_fs(KERNEL_DS); written = file->f_op->write(file, area, size, &file->f_pos); set_fs(old_fs); - up(&inode->i_sem); if (written < 0) { desc->error = written; written = 0; @@ -1362,7 +1355,6 @@ static inline int do_write_page(struct inode * inode, struct file * file, int retval; unsigned long size; loff_t loff = offset; - mm_segment_t old_fs; int (*writepage) (struct file *, struct page *); struct page * page; @@ -1376,8 +1368,6 @@ static inline int do_write_page(struct inode * inode, struct file * file, return -EIO; } size -= offset; - old_fs = get_fs(); - set_fs(KERNEL_DS); retval = -EIO; writepage = inode->i_op->writepage; page = mem_map + MAP_NR(page_addr); @@ -1386,11 +1376,13 @@ static inline int do_write_page(struct inode * inode, struct file * file, if (writepage) { retval = writepage(file, page); } else { + mm_segment_t old_fs = get_fs(); + set_fs(KERNEL_DS); if (size == file->f_op->write(file, page_addr, size, &loff)) - retval = 0; + retval = 0; + set_fs(old_fs); } UnlockPage(page); - set_fs(old_fs); return retval; } @@ -1426,9 +1418,7 @@ static int filemap_write_page(struct vm_area_struct * vma, return 0; } - down(&inode->i_sem); result = do_write_page(inode, file, (const char *) page, offset); - up(&inode->i_sem); fput(file); return result; } @@ -1642,10 +1632,7 @@ static int msync_interval(struct vm_area_struct * vma, struct file * file = vma->vm_file; if (file) { struct dentry * dentry = file->f_dentry; - struct inode * inode = dentry->d_inode; - down(&inode->i_sem); error = file_fsync(file, dentry); - up(&inode->i_sem); } } return error; @@ -1972,10 +1959,8 @@ int kpiod(void * unused) dentry = p->file->f_dentry; inode = dentry->d_inode; - down(&inode->i_sem); do_write_page(inode, p->file, (const char *) p->page, p->offset); - up(&inode->i_sem); fput(p->file); page_cache_free(p->page); kmem_cache_free(pio_request_cache, p); -- 2.39.5