From d9c0ffee4db781aedb2f0a652bdfd9ba346d2624 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 23 Nov 2007 15:17:13 -0500 Subject: [PATCH] Import 2.1.128 --- arch/alpha/kernel/osf_sys.c | 6 +- drivers/block/ll_rw_blk.c | 14 +- fs/exec.c | 15 +- fs/isofs/dir.c | 2 +- fs/minix/fsync.c | 4 +- fs/minix/inode.c | 16 +- fs/minix/truncate.c | 2 +- fs/msdos/namei.c | 2 +- fs/nfs/read.c | 11 + fs/nfs/write.c | 476 ++++++++++++------------------------ fs/qnx4/file.c | 6 +- fs/qnx4/symlinks.c | 6 +- fs/readdir.c | 3 - fs/smbfs/file.c | 29 +-- fs/ufs/dir.c | 2 +- fs/vfat/namei.c | 2 +- include/linux/nfs_fs.h | 14 +- include/linux/sched.h | 3 - init/main.c | 8 +- kernel/acct.c | 7 +- kernel/capability.c | 4 +- kernel/exec_domain.c | 3 - kernel/exit.c | 4 - kernel/fork.c | 4 - kernel/info.c | 2 - kernel/itimer.c | 3 - kernel/kmod.c | 2 - kernel/ksyms.c | 16 +- kernel/module.c | 4 +- kernel/panic.c | 2 - kernel/printk.c | 4 - kernel/resource.c | 2 - kernel/sched.c | 7 - kernel/signal.c | 6 +- kernel/softirq.c | 6 +- kernel/sys.c | 7 - kernel/sysctl.c | 2 - kernel/time.c | 3 - mm/filemap.c | 25 +- 39 files changed, 224 insertions(+), 510 deletions(-) diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 00e02d8a1633..4c540f6968a8 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -1136,7 +1136,7 @@ osf_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, unsigned long timeout; int ret; - timeout = ~0UL; + timeout = MAX_SCHEDULE_TIMEOUT; if (tvp) { time_t sec, usec; @@ -1147,8 +1147,6 @@ osf_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, timeout = (usec + 1000000/HZ - 1) / (1000000/HZ); timeout += sec * HZ; - if (timeout) - timeout += jiffies + 1; } ret = -ENOMEM; @@ -1168,7 +1166,7 @@ osf_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, zero_fd_set(n, fds->res_out); zero_fd_set(n, fds->res_ex); - ret = do_select(n, fds, timeout); + ret = do_select(n, fds, &timeout); /* OSF does not copy back the remaining time. */ diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index b86019ecf8de..3d26017566d4 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -331,10 +331,16 @@ void add_request(struct blk_dev_struct * dev, struct request * req) goto out; } for ( ; tmp->next ; tmp = tmp->next) { - if ((IN_ORDER(tmp,req) || - !IN_ORDER(tmp,tmp->next)) && - IN_ORDER(req,tmp->next)) - break; + const int after_current = IN_ORDER(tmp,req); + const int before_next = IN_ORDER(req,tmp->next); + + if (!IN_ORDER(tmp,tmp->next)) { + if (after_current || before_next) + break; + } else { + if (after_current && before_next) + break; + } } req->next = tmp->next; tmp->next = req; diff --git a/fs/exec.c b/fs/exec.c index d0d2da81c26d..2a35ed7dad25 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -445,9 +445,9 @@ fail_nomem: /* * This function makes sure the current process has its own signal table, - * so that flush_old_signals can later reset the signals without disturbing - * other processes. (Other processes might share the signal table via - * the CLONE_SIGHAND option to clone().) + * so that flush_signal_handlers can later reset the handlers without + * disturbing other processes. (Other processes might share the signal + * table via the CLONE_SIGHAND option to clone().) */ static inline int make_private_signals(void) @@ -485,13 +485,6 @@ static inline void release_old_signals(struct signal_struct * oldsig) * These functions flushes out all traces of the currently running executable * so that a new one can be started */ -static inline void flush_old_signals(struct task_struct *t) -{ -#if 0 - flush_signals(t); -#endif - flush_signal_handlers(t); -} static inline void flush_old_files(struct files_struct * files) { @@ -554,7 +547,7 @@ int flush_old_exec(struct linux_binprm * bprm) permission(bprm->dentry->d_inode,MAY_READ)) current->dumpable = 0; - flush_old_signals(current); + flush_signal_handlers(current); flush_old_files(current->files); return 0; diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c index f73ec5271fd2..a57f9680c637 100644 --- a/fs/isofs/dir.c +++ b/fs/isofs/dir.c @@ -59,7 +59,7 @@ struct inode_operations isofs_dir_inode_operations = NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ - isofs_bmap, /* bmap */ + NULL, /* bmap */ NULL, /* truncate */ NULL /* permission */ }; diff --git a/fs/minix/fsync.c b/fs/minix/fsync.c index 44606d2600ae..ef3d15db9732 100644 --- a/fs/minix/fsync.c +++ b/fs/minix/fsync.c @@ -144,7 +144,7 @@ static int V1_sync_dindirect(struct inode *inode, unsigned short *diblock, return err; } -int V1_minix_sync_file(struct inode * inode, struct file * file) +static int V1_minix_sync_file(struct inode * inode, struct file * file) { int wait, err = 0; @@ -305,7 +305,7 @@ static int V2_sync_tindirect(struct inode *inode, unsigned long *tiblock, return err; } -int V2_minix_sync_file(struct inode * inode, struct file * file) +static int V2_minix_sync_file(struct inode * inode, struct file * file) { int wait, err = 0; diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 46ae5c11c024..98336c98b09a 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -40,7 +40,7 @@ static void minix_commit_super (struct super_block * sb, sb->s_dirt = 0; } -void minix_write_super (struct super_block * sb) +static void minix_write_super (struct super_block * sb) { struct minix_super_block * ms; @@ -55,7 +55,7 @@ void minix_write_super (struct super_block * sb) } -void minix_put_super(struct super_block *sb) +static void minix_put_super(struct super_block *sb) { int i; @@ -86,7 +86,7 @@ static struct super_operations minix_sops = { minix_remount }; -int minix_remount (struct super_block * sb, int * flags, char * data) +static int minix_remount (struct super_block * sb, int * flags, char * data) { struct minix_super_block * ms; @@ -162,7 +162,7 @@ static const char * minix_checkroot(struct super_block *s, struct inode *dir) return errmsg; } -struct super_block *minix_read_super(struct super_block *s, void *data, +static struct super_block *minix_read_super(struct super_block *s, void *data, int silent) { struct buffer_head *bh; @@ -326,7 +326,7 @@ out_bad_sb: return NULL; } -int minix_statfs(struct super_block *sb, struct statfs *buf, int bufsiz) +static int minix_statfs(struct super_block *sb, struct statfs *buf, int bufsiz) { struct statfs tmp; @@ -830,7 +830,7 @@ static void V2_minix_read_inode(struct inode * inode) /* * The global function to read an inode. */ -void minix_read_inode(struct inode * inode) +static void minix_read_inode(struct inode * inode) { if (INODE_VERSION(inode) == MINIX_V1) V1_minix_read_inode(inode); @@ -916,7 +916,7 @@ static struct buffer_head * V2_minix_update_inode(struct inode * inode) return bh; } -struct buffer_head *minix_update_inode(struct inode *inode) +static struct buffer_head *minix_update_inode(struct inode *inode) { if (INODE_VERSION(inode) == MINIX_V1) return V1_minix_update_inode(inode); @@ -924,7 +924,7 @@ struct buffer_head *minix_update_inode(struct inode *inode) return V2_minix_update_inode(inode); } -void minix_write_inode(struct inode * inode) +static void minix_write_inode(struct inode * inode) { struct buffer_head *bh; diff --git a/fs/minix/truncate.c b/fs/minix/truncate.c index 0f0afa604429..a94806fdf2f5 100644 --- a/fs/minix/truncate.c +++ b/fs/minix/truncate.c @@ -176,7 +176,7 @@ repeat: return retry; } -void V1_minix_truncate(struct inode * inode) +static void V1_minix_truncate(struct inode * inode) { int retry; diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c index bd183c9e8501..0e03e79ee382 100644 --- a/fs/msdos/namei.c +++ b/fs/msdos/namei.c @@ -993,7 +993,7 @@ struct inode_operations msdos_dir_inode_operations = { NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ - fat_bmap, /* bmap */ + NULL, /* bmap */ NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 6f1fdd7ffae3..58b8942327fb 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -230,6 +230,17 @@ nfs_readpage(struct file *file, struct page *page) dprintk("NFS: nfs_readpage (%p %ld@%ld)\n", page, PAGE_SIZE, page->offset); set_bit(PG_locked, &page->flags); + + /* + * Try to flush any pending writes to the file.. + * + * NOTE! Because we own the page lock, there cannot + * be any new pending writes generated at this point. + */ + error = nfs_flush_pages(inode, 0, 0, 0); + if (error) + return error; + atomic_inc(&page->count); if (!IS_SWAPFILE(inode) && !PageError(page) && NFS_SERVER(inode)->rsize >= PAGE_SIZE) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f1e803bd29e3..d1ce5af5eadc 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -59,7 +59,7 @@ #define NFS_PARANOIA 1 #define NFSDBG_FACILITY NFSDBG_PAGECACHE -static void nfs_wback_lock(struct rpc_task *task); +static void nfs_wback_begin(struct rpc_task *task); static void nfs_wback_result(struct rpc_task *task); static void nfs_cancel_request(struct nfs_wreq *req); @@ -73,55 +73,13 @@ static void nfs_cancel_request(struct nfs_wreq *req); * Limit number of delayed writes */ static int nr_write_requests = 0; -static int nr_failed_requests = 0; static struct rpc_wait_queue write_queue = RPC_INIT_WAITQ("write_chain"); -struct nfs_wreq * nfs_failed_requests = NULL; /* Hack for future NFS swap support */ #ifndef IS_SWAPFILE # define IS_SWAPFILE(inode) (0) #endif -/* - * Unlock a page after writing it - */ -static inline void -nfs_unlock_page(struct page *page) -{ - dprintk("NFS: unlock %ld\n", page->offset); - clear_bit(PG_locked, &page->flags); - wake_up(&page->wait); - -#ifdef CONFIG_NFS_SWAP - /* async swap-out support */ - if (test_and_clear_bit(PG_decr_after, &page->flags)) - atomic_dec(&page->count); - if (test_and_clear_bit(PG_swap_unlock_after, &page->flags)) { - /* - * We're doing a swap, so check that this page is - * swap-cached and do the necessary cleanup. - */ - swap_after_unlock_page(page->offset); - } -#endif -} - -/* - * Transfer a page lock to a write request waiting for it. - */ -static inline void -transfer_page_lock(struct nfs_wreq *req) -{ - dprintk("NFS: transfer_page_lock\n"); - - req->wb_flags &= ~NFS_WRITE_WANTLOCK; - req->wb_flags |= NFS_WRITE_LOCKED; - rpc_wake_up_task(&req->wb_task); - - dprintk("NFS: wake up task %d (flags %x)\n", - req->wb_task.tk_pid, req->wb_flags); -} - /* * Write a page synchronously. * Offset is the data offset within the page. @@ -195,7 +153,6 @@ io_error: inode->i_ino, fattr.fileid); } - nfs_unlock_page(page); return written? written : result; } @@ -220,11 +177,13 @@ remove_write_request(struct nfs_wreq **q, struct nfs_wreq *wreq) } /* - * Find a write request for a given page + * Find a non-busy write request for a given page to + * try to combine with. */ static inline struct nfs_wreq * find_write_request(struct inode *inode, struct page *page) { + pid_t pid = current->pid; struct nfs_wreq *head, *req; dprintk("NFS: find_write_request(%x/%ld, %p)\n", @@ -232,8 +191,21 @@ find_write_request(struct inode *inode, struct page *page) if (!(req = head = NFS_WRITEBACK(inode))) return NULL; do { - if (req->wb_page == page) + /* + * We can't combine with canceled requests or + * requests that have already been started.. + */ + if (req->wb_flags & (NFS_WRITE_CANCELLED | NFS_WRITE_INPROGRESS)) + continue; + + if (req->wb_page == page && req->wb_pid == pid) return req; + + /* + * Ehh, don't keep too many tasks queued.. + */ + rpc_wake_up_task(&req->wb_task); + } while ((req = WB_NEXT(req)) != head); return NULL; } @@ -259,65 +231,14 @@ nfs_find_dentry_request(struct inode *inode, struct dentry *dentry) return found; } -/* - * Find a failed write request by pid - */ -static struct nfs_wreq * -find_failed_request(struct inode *inode, pid_t pid) -{ - struct nfs_wreq *head, *req; - - req = head = nfs_failed_requests; - while (req != NULL) { - if (req->wb_inode == inode && (pid == 0 || req->wb_pid == pid)) - return req; - if ((req = WB_NEXT(req)) == head) - break; - } - return NULL; -} - -/* - * Add a request to the failed list. - */ -static void -append_failed_request(struct nfs_wreq * req) -{ - static int old_max = 16; - - append_write_request(&nfs_failed_requests, req); - nr_failed_requests++; - if (nr_failed_requests >= old_max) { - printk("NFS: %d failed requests\n", nr_failed_requests); - old_max = old_max << 1; - } -} - -/* - * Remove a request from the failed list and free it. - */ -static void -remove_failed_request(struct nfs_wreq * req) -{ - remove_write_request(&nfs_failed_requests, req); - kfree(req); - nr_failed_requests--; -} - /* * Find and release all failed requests for this inode. */ int nfs_check_failed_request(struct inode * inode) { - struct nfs_wreq * req; - int found = 0; - - while ((req = find_failed_request(inode, 0)) != NULL) { - remove_failed_request(req); - found++; - } - return found; + /* FIXME! */ + return 0; } /* @@ -334,6 +255,10 @@ update_write_request(struct nfs_wreq *req, unsigned int first, dprintk("nfs: trying to update write request %p\n", req); + /* not contiguous? */ + if (rqlast < first || last < rqfirst) + return 0; + /* Check the credentials associated with this write request. * If the buffer is owned by the same user, we can happily * add our data without risking server permission problems. @@ -349,12 +274,25 @@ update_write_request(struct nfs_wreq *req, unsigned int first, rqfirst = first; if (rqlast < last) rqlast = last; + req->wb_offset = rqfirst; req->wb_bytes = rqlast - rqfirst; + req->wb_count++; return 1; } +static inline void +free_write_request(struct nfs_wreq * req) +{ + if (!--req->wb_count) { + struct inode *inode = req->wb_inode; + remove_write_request(&NFS_WRITEBACK(inode), req); + kfree(req); + nr_write_requests--; + } +} + /* * Create and initialize a writeback request */ @@ -371,8 +309,7 @@ create_write_request(struct dentry *dentry, struct inode *inode, page->offset + offset, bytes); /* FIXME: Enforce hard limit on number of concurrent writes? */ - - wreq = (struct nfs_wreq *) kmalloc(sizeof(*wreq), GFP_USER); + wreq = (struct nfs_wreq *) kmalloc(sizeof(*wreq), GFP_KERNEL); if (!wreq) goto out_fail; memset(wreq, 0, sizeof(*wreq)); @@ -380,7 +317,7 @@ create_write_request(struct dentry *dentry, struct inode *inode, task = &wreq->wb_task; rpc_init_task(task, clnt, nfs_wback_result, RPC_TASK_NFSWRITE); task->tk_calldata = wreq; - task->tk_action = nfs_wback_lock; + task->tk_action = nfs_wback_begin; rpcauth_lookupcred(task); /* Obtain user creds */ if (task->tk_status < 0) @@ -393,6 +330,7 @@ create_write_request(struct dentry *dentry, struct inode *inode, wreq->wb_page = page; wreq->wb_offset = offset; wreq->wb_bytes = bytes; + wreq->wb_count = 2; /* One for the IO, one for us */ atomic_inc(&page->count); @@ -414,8 +352,7 @@ out_fail: * Schedule a writeback RPC call. * If the server is congested, don't add to our backlog of queued * requests but call it synchronously. - * The function returns false if the page has been unlocked as the - * consequence of a synchronous write call. + * The function returns whether we should wait for the thing or not. * * FIXME: Here we could walk the inode's lock list to see whether the * page we're currently writing to has been write-locked by the caller. @@ -438,7 +375,6 @@ schedule_write_request(struct nfs_wreq *req, int sync) dprintk("NFS: %4d schedule_write_request (sync)\n", task->tk_pid); /* Page is already locked */ - req->wb_flags |= NFS_WRITE_LOCKED; rpc_clnt_sigmask(clnt, &oldmask); rpc_execute(task); rpc_clnt_sigunmask(clnt, &oldmask); @@ -450,39 +386,38 @@ schedule_write_request(struct nfs_wreq *req, int sync) rpc_sleep_on(&write_queue, task, NULL, NULL); } - return sync == 0; + return sync; } /* - * Wait for request to complete - * This is almost a copy of __wait_on_page + * Wait for request to complete. */ -static inline int +static int wait_on_write_request(struct nfs_wreq *req) { + struct rpc_clnt *clnt = NFS_CLIENT(req->wb_inode); struct wait_queue wait = { current, NULL }; - struct page *page = req->wb_page; - int retval; sigset_t oldmask; - struct rpc_clnt *clnt = NFS_CLIENT(req->wb_inode); + int retval; + + /* Make sure it's started.. */ + if (!WB_INPROGRESS(req)) + rpc_wake_up_task(&req->wb_task); rpc_clnt_sigmask(clnt, &oldmask); - add_wait_queue(&page->wait, &wait); - atomic_inc(&page->count); + add_wait_queue(&req->wb_wait, &wait); for (;;) { current->state = TASK_INTERRUPTIBLE; retval = 0; - if (!PageLocked(page)) + if (req->wb_flags & NFS_WRITE_COMPLETE) break; retval = -ERESTARTSYS; if (signalled()) break; schedule(); } - remove_wait_queue(&page->wait, &wait); + remove_wait_queue(&req->wb_wait, &wait); current->state = TASK_RUNNING; - /* N.B. page may have been unused, so we must use free_page() */ - free_page(page_address(page)); rpc_clnt_sigunmask(clnt, &oldmask); return retval; } @@ -510,7 +445,8 @@ nfs_updatepage(struct file *file, struct page *page, unsigned long offset, unsig struct dentry *dentry = file->f_dentry; struct inode *inode = dentry->d_inode; struct nfs_wreq *req; - int status = 0, page_locked = 1; + int synchronous = sync; + int retval; dprintk("NFS: nfs_updatepage(%s/%s %d@%ld, sync=%d)\n", dentry->d_parent->d_name.name, dentry->d_name.name, @@ -526,143 +462,54 @@ nfs_updatepage(struct file *file, struct page *page, unsigned long offset, unsig * After returning, generic_file_write will wait on the * page and retry the update. */ - if ((req = find_write_request(inode, page)) != NULL) { - if (update_write_request(req, offset, count)) - goto updated; + req = find_write_request(inode, page); + if (req && update_write_request(req, offset, count)) + goto updated; - dprintk("NFS: wake up conflicting write request.\n"); - transfer_page_lock(req); - return 0; - } - - /* If wsize is smaller than page size, update and write + /* + * If wsize is smaller than page size, update and write * page synchronously. */ if (NFS_SERVER(inode)->wsize < PAGE_SIZE) return nfs_writepage_sync(dentry, inode, page, offset, count); /* Create the write request. */ - status = -ENOBUFS; req = create_write_request(dentry, inode, page, offset, count); if (!req) - goto done; + return -ENOBUFS; + + /* + * Ok, there's another user of this page with the new request.. + * Increment the usage count, and schedule the request (the + * end of the request will drop the usage count..) + */ + atomic_inc(&page->count); /* Schedule request */ - page_locked = schedule_write_request(req, sync); + synchronous = schedule_write_request(req, sync); updated: if (req->wb_bytes == PAGE_SIZE) set_bit(PG_uptodate, &page->flags); - /* - * If we wrote up to the end of the chunk, transmit request now. - * We should be a bit more intelligent about detecting whether a - * process accesses the file sequentially or not. - */ - if (page_locked && (offset + count >= PAGE_SIZE || sync)) - req->wb_flags |= NFS_WRITE_WANTLOCK; - - /* If the page was written synchronously, return any error that - * may have happened; otherwise return the write count. */ - if (page_locked || (status = nfs_write_error(inode)) >= 0) - status = count; - -done: - /* Unlock page and wake up anyone sleeping on it */ - if (page_locked) { - if (req && WB_WANTLOCK(req)) { - transfer_page_lock(req); - /* rpc_execute(&req->wb_task); */ - if (sync) { - /* if signalled, ensure request is cancelled */ - if ((count = wait_on_write_request(req)) != 0) { - nfs_cancel_request(req); - status = count; - } - if ((count = nfs_write_error(inode)) < 0) - status = count; - } + retval = count; + if (synchronous) { + int status = wait_on_write_request(req); + if (status) { + nfs_cancel_request(req); + retval = status; } else { - if (status < 0) { -printk("NFS: %s/%s write failed, clearing bit\n", -dentry->d_parent->d_name.name, dentry->d_name.name); - clear_bit(PG_uptodate, &page->flags); - } - nfs_unlock_page(page); + status = req->wb_status; + if (status < 0) + retval = status; } - } - - dprintk("NFS: nfs_updatepage returns %d (isize %ld)\n", - status, inode->i_size); - return status; -} - -/* - * Flush out a dirty page. - */ -static void -nfs_flush_request(struct nfs_wreq *req) -{ - struct page *page = req->wb_page; - -#ifdef NFS_DEBUG_VERBOSE -if (req->wb_inode != page->inode) -printk("NFS: inode %ld no longer has page %p\n", req->wb_inode->i_ino, page); -#endif - dprintk("NFS: nfs_flush_request(%s/%s, @%ld)\n", - req->wb_dentry->d_parent->d_name.name, - req->wb_dentry->d_name.name, page->offset); - - req->wb_flags |= NFS_WRITE_WANTLOCK; - if (!test_and_set_bit(PG_locked, &page->flags)) { - transfer_page_lock(req); - } else { - printk(KERN_WARNING "NFS oops in %s: can't lock page!\n", - __FUNCTION__); - rpc_wake_up_task(&req->wb_task); - } -} - -/* - * Flush writeback requests. See nfs_flush_dirty_pages for details. - */ -static struct nfs_wreq * -nfs_flush_pages(struct inode *inode, pid_t pid, off_t offset, off_t len, - int invalidate) -{ - struct nfs_wreq *head, *req, *last = NULL; - off_t rqoffset, rqend, end; - - end = len? offset + len : 0x7fffffffUL; - - req = head = NFS_WRITEBACK(inode); - while (req != NULL) { - dprintk("NFS: %4d nfs_flush inspect %s/%s @%ld fl %x\n", - req->wb_task.tk_pid, - req->wb_dentry->d_parent->d_name.name, - req->wb_dentry->d_name.name, - req->wb_page->offset, req->wb_flags); - rqoffset = req->wb_page->offset + req->wb_offset; - rqend = rqoffset + req->wb_bytes; - if (rqoffset < end && offset < rqend && - (pid == 0 || req->wb_pid == pid)) { - if (!WB_INPROGRESS(req) && !WB_HAVELOCK(req)) { -#ifdef NFS_DEBUG_VERBOSE -printk("nfs_flush: flushing inode=%ld, %d @ %lu\n", -req->wb_inode->i_ino, req->wb_bytes, rqoffset); -#endif - nfs_flush_request(req); - } - last = req; - } - if (invalidate) - req->wb_flags |= NFS_WRITE_INVALIDATE; - if ((req = WB_NEXT(req)) == head) - break; + if (retval < 0) + clear_bit(PG_uptodate, &page->flags); } - return last; + free_write_request(req); + return retval; } /* @@ -699,53 +546,67 @@ nfs_cancel_dirty(struct inode *inode, pid_t pid) } /* - * Flush out all dirty pages belonging to a certain user process and - * maybe wait for the RPC calls to complete. + * Try to flush any dirty pages, returning a success marker.. * - * Another purpose of this function is sync()ing a file range before a - * write lock is released. This is what offset and length are for, even if - * this isn't used by the nlm module yet. + * Unlike "nfs_flush_dirty_pages()" this does not invalidate + * the writes if it is interrupted. The caller will instead + * look at the error code and gracefully fail to do what it + * wanted to do. */ int -nfs_flush_dirty_pages(struct inode *inode, pid_t pid, off_t offset, off_t len) +nfs_flush_pages(struct inode *inode, pid_t pid, off_t offset, off_t len) { - struct nfs_wreq *last = NULL; - int result = 0; + int retval; - dprintk("NFS: flush_dirty_pages(%x/%ld for pid %d %ld/%ld)\n", - inode->i_dev, inode->i_ino, current->pid, offset, len); + do { + struct nfs_wreq *req = NFS_WRITEBACK(inode); + struct nfs_wreq *head = req; - for (;;) { - /* Flush all pending writes for the pid and file region */ - last = nfs_flush_pages(inode, pid, offset, len, 0); - if (last == NULL) - break; - result = wait_on_write_request(last); - if (result) { - nfs_cancel_dirty(inode,pid); - break; + if (!req) + return 0; + + /* + * Iterate over all outstanding write requests, + * looking for any that are ours.. + */ + for (;;) { + if (!(req->wb_flags & NFS_WRITE_COMPLETE)) { + if (!pid || req->wb_pid == pid) + break; + } + req = WB_NEXT(req); + if (req == head) + return 0; } - } - return result; -} + req->wb_count++; + retval = wait_on_write_request(req); + free_write_request(req); + } while (!retval); + return retval; +} /* - * Flush out any pending write requests and flag that they be discarded - * after the write is complete. + * Flush out all dirty pages belonging to a certain user process and + * maybe wait for the RPC calls to complete. * - * This function is called from nfs_refresh_inode just before it calls - * invalidate_inode_pages. After nfs_flush_pages returns, we can be sure - * that all dirty pages are locked, so that invalidate_inode_pages does - * not throw away any dirty pages. + * Another purpose of this function is sync()ing a file range before a + * write lock is released. This is what offset and length are for, even if + * this isn't used by the nlm module yet. */ +int +nfs_flush_dirty_pages(struct inode *inode, pid_t pid, off_t offset, off_t len) +{ + int retval = nfs_flush_pages(inode, pid, offset, len); + if (retval) + nfs_cancel_dirty(inode,pid); + return retval; +} + void nfs_invalidate_pages(struct inode *inode) { - dprintk("NFS: nfs_invalidate_pages(%x/%ld)\n", - inode->i_dev, inode->i_ino); - - nfs_flush_pages(inode, 0, 0, 0, 1); + nfs_cancel_dirty(inode,0); } /* @@ -783,21 +644,8 @@ nfs_truncate_dirty_pages(struct inode *inode, unsigned long offset) int nfs_check_error(struct inode *inode) { - struct nfs_wreq *req; - int status = 0; - - dprintk("nfs: checking for write error inode %04x/%ld\n", - inode->i_dev, inode->i_ino); - - req = find_failed_request(inode, current->pid); - if (req) { - dprintk("nfs: write error %d inode %04x/%ld\n", - req->wb_task.tk_status, inode->i_dev, inode->i_ino); - - status = req->wb_task.tk_status; - remove_failed_request(req); - } - return status; + /* FIXME! */ + return 0; } /* @@ -807,23 +655,16 @@ nfs_check_error(struct inode *inode) * set up the RPC call info, and pass to the call FSM. */ static void -nfs_wback_lock(struct rpc_task *task) +nfs_wback_begin(struct rpc_task *task) { struct nfs_wreq *req = (struct nfs_wreq *) task->tk_calldata; struct page *page = req->wb_page; struct dentry *dentry = req->wb_dentry; - dprintk("NFS: %4d nfs_wback_lock (%s/%s, status=%d flags=%x)\n", + dprintk("NFS: %4d nfs_wback_begin (%s/%s, status=%d flags=%x)\n", task->tk_pid, dentry->d_parent->d_name.name, dentry->d_name.name, task->tk_status, req->wb_flags); - if (!WB_HAVELOCK(req)) - req->wb_flags |= NFS_WRITE_WANTLOCK; - - if (WB_WANTLOCK(req) && test_and_set_bit(PG_locked, &page->flags)) - goto out_locked; - req->wb_flags &= ~NFS_WRITE_WANTLOCK; - req->wb_flags |= NFS_WRITE_LOCKED; task->tk_status = 0; /* Setup the task struct for a writeback call */ @@ -836,12 +677,6 @@ nfs_wback_lock(struct rpc_task *task) req->wb_flags |= NFS_WRITE_INPROGRESS; return; - -out_locked: - printk("NFS: page already locked in writeback_lock!\n"); - task->tk_timeout = 2 * HZ; - rpc_sleep_on(&write_queue, task, NULL, NULL); - return; } /* @@ -861,15 +696,10 @@ nfs_wback_result(struct rpc_task *task) /* Set the WRITE_COMPLETE flag, but leave WRITE_INPROGRESS set */ req->wb_flags |= NFS_WRITE_COMPLETE; + req->wb_status = status; + if (status < 0) { - /* - * An error occurred. Report the error back to the - * application by adding the request to the failed - * requests list. - */ - if (find_failed_request(inode, req->wb_pid)) - status = 0; - clear_bit(PG_uptodate, &page->flags); + req->wb_flags |= NFS_WRITE_INVALIDATE; } else if (!WB_CANCELLED(req)) { struct nfs_fattr *fattr = &req->wb_fattr; /* Update attributes as result of writeback. @@ -899,31 +729,27 @@ nfs_wback_result(struct rpc_task *task) } } - /* - * This call might block, so we defer removing the request - * from the inode's writeback list. - */ rpc_release_task(task); if (WB_INVALIDATE(req)) clear_bit(PG_uptodate, &page->flags); - if (WB_HAVELOCK(req)) - nfs_unlock_page(page); + __free_page(page); + + wake_up(&req->wb_wait); + /* - * Now it's safe to remove the request from the inode's - * writeback list and wake up any tasks sleeping on it. - * If the request failed, add it to the failed list. + * FIXME! + * + * We should not free the request here if it has pending + * error status on it. We should just leave it around, to + * let the error be collected later. However, the error + * collecting routines are too stupid for that right now, + * so we just drop the error on the floor at this point + * for any async writes. + * + * This should not be a major headache to fix, but I want + * to validate basic operations first. */ - remove_write_request(&NFS_WRITEBACK(inode), req); - - if (status >= 0) - kfree(req); - else { - dprintk("NFS: %4d saving write failure code\n", task->tk_pid); - append_failed_request(req); - } - - free_page(page_address(page)); - nr_write_requests--; + free_write_request(req); } diff --git a/fs/qnx4/file.c b/fs/qnx4/file.c index f26733d9a9ec..49998012677b 100644 --- a/fs/qnx4/file.c +++ b/fs/qnx4/file.c @@ -180,11 +180,7 @@ static struct file_operations qnx4_file_operations = struct inode_operations qnx4_file_inode_operations = { &qnx4_file_operations, /* default file operations */ -#ifdef CONFIG_QNX4FS_RW - qnx4_create, /* create */ -#else - NULL, -#endif + NULL, /* create? It's not a directory */ NULL, /* lookup */ NULL, /* link */ NULL, /* unlink */ diff --git a/fs/qnx4/symlinks.c b/fs/qnx4/symlinks.c index f590d04a6d97..2fb7c748e79f 100644 --- a/fs/qnx4/symlinks.c +++ b/fs/qnx4/symlinks.c @@ -32,11 +32,7 @@ static struct dentry *qnx4_follow_link(struct dentry *, struct dentry *); struct inode_operations qnx4_symlink_inode_operations = { NULL, /* no file-operations */ -#ifdef CONFIG_QNX4FS_RW - qnx4_create, /* create */ -#else - NULL, -#endif + NULL, /* create */ NULL, /* lookup */ NULL, /* link */ NULL, /* unlink */ diff --git a/fs/readdir.c b/fs/readdir.c index f517c9a87b8f..305b1cc74762 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -6,12 +6,9 @@ #include #include -#include #include #include #include -#include -#include #include #include diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c index 3c3e87aa6a01..a6cf24ce1714 100644 --- a/fs/smbfs/file.c +++ b/fs/smbfs/file.c @@ -195,40 +195,15 @@ smb_writepage(struct file *file, struct page *page) } static int -smb_updatepage(struct file *file, struct page *page, const char *buffer, - unsigned long offset, unsigned int count, int sync) +smb_updatepage(struct file *file, struct page *page, unsigned long offset, unsigned int count, int sync) { struct dentry *dentry = file->f_dentry; - unsigned long page_addr = page_address(page); - int result; pr_debug("SMBFS: smb_updatepage(%s/%s %d@%ld, sync=%d)\n", dentry->d_parent->d_name.name, dentry->d_name.name, count, page->offset+offset, sync); -#ifdef SMBFS_PARANOIA - if (test_bit(PG_locked, &page->flags)) - printk("smb_updatepage: page already locked!\n"); -#endif - set_bit(PG_locked, &page->flags); - atomic_inc(&page->count); - - if (copy_from_user((char *) page_addr + offset, buffer, count)) - goto bad_fault; - result = smb_writepage_sync(dentry, page, offset, count); -out: - free_page(page_addr); - return result; - -bad_fault: -#ifdef SMBFS_PARANOIA -printk("smb_updatepage: fault at addr=%lu, offset=%lu, buffer=%p\n", -page_addr, offset, buffer); -#endif - result = -EFAULT; - clear_bit(PG_uptodate, &page->flags); - smb_unlock_page(page); - goto out; + return smb_writepage_sync(dentry, page, offset, count); } static ssize_t diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index 170ffb10b6be..6fc224512012 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c @@ -220,7 +220,7 @@ struct inode_operations ufs_dir_inode_operations = { NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ - ufs_bmap, /* bmap */ + NULL, /* bmap */ NULL, /* truncate */ ufs_permission, /* permission */ NULL, /* smap */ diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c index fe1565106806..59622732ca4a 100644 --- a/fs/vfat/namei.c +++ b/fs/vfat/namei.c @@ -1846,7 +1846,7 @@ struct inode_operations vfat_dir_inode_operations = { NULL, /* followlink */ NULL, /* readpage */ NULL, /* writepage */ - fat_bmap, /* bmap */ + NULL, /* bmap */ NULL, /* truncate */ NULL /* permission */ }; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index e8df622ae8f9..c6ea474365eb 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -101,31 +101,29 @@ struct nfs_wreq { struct dentry * wb_dentry; /* dentry referenced */ struct inode * wb_inode; /* inode referenced */ struct page * wb_page; /* page to be written */ + struct wait_queue * wb_wait; /* wait for completion */ unsigned int wb_offset; /* offset within page */ unsigned int wb_bytes; /* dirty range */ + unsigned int wb_count; /* user count */ + int wb_status; pid_t wb_pid; /* owner process */ unsigned short wb_flags; /* status flags */ struct nfs_writeargs wb_args; /* NFS RPC stuff */ struct nfs_fattr wb_fattr; /* file attributes */ }; -#define wb_status wb_task.tk_status #define WB_NEXT(req) ((struct nfs_wreq *) ((req)->wb_list.next)) /* * Various flags for wb_flags */ -#define NFS_WRITE_WANTLOCK 0x0001 /* needs to lock page */ -#define NFS_WRITE_LOCKED 0x0002 /* holds lock on page */ #define NFS_WRITE_CANCELLED 0x0004 /* has been cancelled */ #define NFS_WRITE_UNCOMMITTED 0x0008 /* written but uncommitted (NFSv3) */ #define NFS_WRITE_INVALIDATE 0x0010 /* invalidate after write */ #define NFS_WRITE_INPROGRESS 0x0100 /* RPC call in progress */ #define NFS_WRITE_COMPLETE 0x0200 /* RPC call completed */ -#define WB_WANTLOCK(req) ((req)->wb_flags & NFS_WRITE_WANTLOCK) -#define WB_HAVELOCK(req) ((req)->wb_flags & NFS_WRITE_LOCKED) #define WB_CANCELLED(req) ((req)->wb_flags & NFS_WRITE_CANCELLED) #define WB_UNCOMMITTED(req) ((req)->wb_flags & NFS_WRITE_UNCOMMITTED) #define WB_INVALIDATE(req) ((req)->wb_flags & NFS_WRITE_INVALIDATE) @@ -217,6 +215,7 @@ extern int nfs_writepage(struct file *, struct page *); extern int nfs_find_dentry_request(struct inode *, struct dentry *); extern int nfs_check_failed_request(struct inode *); extern int nfs_check_error(struct inode *); +extern int nfs_flush_pages(struct inode *, pid_t, off_t, off_t); extern int nfs_flush_dirty_pages(struct inode *, pid_t, off_t, off_t); extern int nfs_truncate_dirty_pages(struct inode *, unsigned long); extern void nfs_invalidate_pages(struct inode *); @@ -245,13 +244,10 @@ nfs_revalidate_inode(struct nfs_server *server, struct dentry *dentry) return _nfs_revalidate_inode(server, dentry); } -extern struct nfs_wreq * nfs_failed_requests; static inline int nfs_write_error(struct inode *inode) { - if (nfs_failed_requests == NULL) - return 0; - return nfs_check_error(inode); + return NFS_WRITEBACK(inode) && nfs_check_error(inode); } /* NFS root */ diff --git a/include/linux/sched.h b/include/linux/sched.h index bb9364e2b171..086cb1c95e06 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -20,7 +20,6 @@ extern unsigned long event; #include #include #include -#include #include /* @@ -65,11 +64,9 @@ extern int nr_running, nr_tasks; extern int last_pid; #include -#include #include #include #include -#include #include #include diff --git a/init/main.c b/init/main.c index 4adf92e9d536..f208b17b6729 100644 --- a/init/main.c +++ b/init/main.c @@ -11,16 +11,11 @@ #define __KERNEL_SYSCALLS__ -#include +#include #include #include -#include -#include -#include #include #include -#include -#include #include #include #include @@ -34,7 +29,6 @@ #include #include -#include #include #include diff --git a/kernel/acct.c b/kernel/acct.c index ed0353184a53..1c39c73f2ab7 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -45,18 +45,13 @@ #include #ifdef CONFIG_BSD_PROCESS_ACCT -#include +#include #include -#include #include #include -#include #include -#include -#include #include #include -#include #include #include diff --git a/kernel/capability.c b/kernel/capability.c index 1ba6db19245b..45e24f6e4488 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -6,10 +6,8 @@ */ #include -#include -#include -#include #include +#include #include #include diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c index 5d7e2f056c9c..793b42c13dc4 100644 --- a/kernel/exec_domain.c +++ b/kernel/exec_domain.c @@ -1,8 +1,5 @@ -#include -#include #include #include -#include #include #include diff --git a/kernel/exit.c b/kernel/exit.c index dcb23ac974c4..44db626cb6e5 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -7,16 +7,12 @@ #include #include #include -#include #include -#include #include #include -#include #include #include #include -#include #include #include #include diff --git a/kernel/fork.c b/kernel/fork.c index a1ef239aa636..dcb08b761f4f 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -14,17 +14,13 @@ #include #include #include -#include #include #include #include -#include #include -#include #include #include -#include #include #include #include diff --git a/kernel/info.c b/kernel/info.c index ffaec7140e78..8a56ce5f2623 100644 --- a/kernel/info.c +++ b/kernel/info.c @@ -9,10 +9,8 @@ #include #include #include -#include #include #include -#include #include #include diff --git a/kernel/itimer.c b/kernel/itimer.c index 192f80954f26..5709598a4d2a 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c @@ -6,13 +6,10 @@ /* These are all the functions necessary to implement itimers */ -#include #include #include #include -#include #include -#include #include #include diff --git a/kernel/kmod.c b/kernel/kmod.c index 7a57d778e1f3..8b19e2415db3 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -12,10 +12,8 @@ #define __KERNEL_SYSCALLS__ #include -#include #include #include -#include #include diff --git a/kernel/ksyms.c b/kernel/ksyms.c index 70be945efbbb..31ddd48bd4f7 100644 --- a/kernel/ksyms.c +++ b/kernel/ksyms.c @@ -11,32 +11,24 @@ #include #include -#include -#include +#include #include #include -#include #include #include #include #include #include -#include #include #include #include #include -#include -#include -#include #include #include -#include #include #include #include #include -#include #include #include #include @@ -51,9 +43,6 @@ #include #include #include -#include - -#include #if defined(CONFIG_PROC_FS) #include @@ -62,9 +51,6 @@ #include #endif #include -#ifdef __SMP__ -#include -#endif extern char *get_options(char *str, int *ints); extern void set_device_ro(kdev_t dev,int flag); diff --git a/kernel/module.c b/kernel/module.c index e367a747c3db..a71ff3de7385 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1,13 +1,11 @@ #include -#include +#include #include /* defines GFP_KERNEL */ #include #include -#include #include #include #include -#include #include #include diff --git a/kernel/panic.c b/kernel/panic.c index 14bb727fc33f..bba75e3effc0 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -10,10 +10,8 @@ */ #include -#include #include #include -#include #include #include #include diff --git a/kernel/printk.c b/kernel/printk.c index 0d8287fefaee..66c09926c064 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -16,16 +16,12 @@ #include #include -#include #include -#include #include -#include #include #include #include -#include #include #define LOG_BUF_LEN (16384) diff --git a/kernel/resource.c b/kernel/resource.c index 2d6b56eb0fd4..fa607edf8a71 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -8,9 +8,7 @@ */ #include -#include #include -#include #include #include diff --git a/kernel/sched.c b/kernel/sched.c index 08bc754ae351..5b27103b412c 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -16,25 +16,18 @@ * current-task */ -#include #include -#include -#include #include #include #include -#include -#include #include #include #include #include #include -#include #include #include -#include #include #include #include diff --git a/kernel/signal.c b/kernel/signal.c index e519bcb22c21..ebfb88ad2771 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -8,14 +8,10 @@ #include #include -#include -#include #include #include -#include #include #include -#include #include #include #include @@ -333,10 +329,10 @@ printk("SIG queue (%s:%d): %d ", t->comm, t->pid, sig); if (nr_queued_signals < max_queued_signals) { q = (struct signal_queue *) kmem_cache_alloc(signal_queue_cachep, GFP_KERNEL); - nr_queued_signals++; } if (q) { + nr_queued_signals++; q->next = NULL; *t->sigqueue_tail = q; t->sigqueue_tail = &q->next; diff --git a/kernel/softirq.c b/kernel/softirq.c index 4bc5ee4b6f55..42bdd17523de 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -8,17 +8,13 @@ * bottom_half handler need not be re-entrant. */ -#include #include -#include -#include #include +#include #include #include -#include #include -#include #include #include #include diff --git a/kernel/sys.c b/kernel/sys.c index 062f9f5db864..0ef636bfb33c 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -6,20 +6,13 @@ #include #include -#include -#include #include -#include #include -#include #include -#include #include #include #include #include -#include -#include #include #include #include diff --git a/kernel/sysctl.c b/kernel/sysctl.c index f439d77db387..69b81fe508db 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -22,10 +22,8 @@ #include #include #include -#include #include #include -#include #include #include diff --git a/kernel/time.c b/kernel/time.c index 0f1094655828..884fa1519496 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -20,12 +20,9 @@ #include #include -#include -#include #include #include #include -#include #include #include diff --git a/mm/filemap.c b/mm/filemap.c index 1691cc68c71a..df82a952418d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1567,7 +1567,8 @@ generic_file_write(struct file *file, const char *buf, bytes = count; hash = page_hash(inode, pgpos); - if (!(page = __find_page(inode, pgpos, *hash))) { + page = __find_page(inode, pgpos, *hash); + if (!page) { if (!page_cache) { page_cache = __get_free_page(GFP_USER); if (page_cache) @@ -1580,21 +1581,25 @@ generic_file_write(struct file *file, const char *buf, page_cache = 0; } + /* Get exclusive IO access to the page.. */ wait_on_page(page); set_bit(PG_locked, &page->flags); + /* + * Do the real work.. If the writer ends up delaying the write, + * the writer needs to increment the page use counts until he + * is done with the page. + */ bytes -= copy_from_user((u8*)page_address(page) + offset, buf, bytes); - if (!bytes) { - status = -EFAULT; - clear_bit(PG_locked, &page->flags); - wake_up(&page->wait); - __free_page(page); - break; - } - - status = inode->i_op->updatepage(file, page, offset, bytes, sync); + status = -EFAULT; + if (bytes) + status = inode->i_op->updatepage(file, page, offset, bytes, sync); + /* Mark it unlocked again and drop the page.. */ + clear_bit(PG_locked, &page->flags); + wake_up(&page->wait); __free_page(page); + if (status < 0) break; -- 2.39.5