]> git.neil.brown.name Git - history.git/commitdiff
[PATCH] Make nfs_find_request() scale
authorTrond Myklebust <trond.myklebust@fys.uio.no>
Fri, 8 Nov 2002 05:47:55 +0000 (21:47 -0800)
committerTrond Myklebust <trond.myklebust@fys.uio.no>
Fri, 8 Nov 2002 05:47:55 +0000 (21:47 -0800)
nfs_find_request() needs to be called every time we schedule a write
on the page cache. Currently it is implemented as a linked list which
needs to be traversed completely in the case where we don't already
have a pending write request on the page in question.

The following patch adopts the new radix tree, as is already used in
the page cache. Performance change is more or less negligeable with
the current hard limit of 256 outstanding write requests per mount.

However when I remove this limit then the old nfs_find_request()
actually results in a 50% reduction in speed on my benchmark test
(iozone with 4 threads each writing a 512Mb file on a 512Mb Linux
client against a Solaris server on 100Mbit switched net). With this
patch, the result for the same benchmark is a 50% increase in speed.

fs/nfs/inode.c
fs/nfs/pagelist.c
fs/nfs/write.c
include/linux/nfs_fs.h
include/linux/nfs_page.h

index b35e333f26d6690078daccdb7b4018457fee4eff..3e79bf793b483fcd60aca811a65404752c079c65 100644 (file)
@@ -1559,7 +1559,7 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
                inode_init_once(&nfsi->vfs_inode);
                INIT_LIST_HEAD(&nfsi->dirty);
                INIT_LIST_HEAD(&nfsi->commit);
-               INIT_LIST_HEAD(&nfsi->writeback);
+               INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
                nfsi->ndirty = 0;
                nfsi->ncommit = 0;
                nfsi->npages = 0;
index 1650ddc943e893927e8351dbd269e4264c2c7d12..ea105a94fdbb29814f53e8fe6d9fe731eea061e8 100644 (file)
@@ -36,7 +36,6 @@ nfs_page_alloc(void)
        p = kmem_cache_alloc(nfs_page_cachep, SLAB_NOFS);
        if (p) {
                memset(p, 0, sizeof(*p));
-               INIT_LIST_HEAD(&p->wb_hash);
                INIT_LIST_HEAD(&p->wb_list);
                INIT_LIST_HEAD(&p->wb_lru);
                init_waitqueue_head(&p->wb_wait);
@@ -161,14 +160,9 @@ nfs_release_request(struct nfs_page *req)
        spin_unlock(&nfs_wreq_lock);
 
 #ifdef NFS_PARANOIA
-       if (!list_empty(&req->wb_list))
-               BUG();
-       if (!list_empty(&req->wb_hash))
-               BUG();
-       if (NFS_WBACK_BUSY(req))
-               BUG();
-       if (atomic_read(&NFS_REQUESTLIST(req->wb_inode)->nr_requests) < 0)
-               BUG();
+       BUG_ON (!list_empty(&req->wb_list));
+       BUG_ON (NFS_WBACK_BUSY(req));
+       BUG_ON (atomic_read(&NFS_REQUESTLIST(req->wb_inode)->nr_requests) < 0);
 #endif
 
        /* Release struct file or cached credential */
index 2576cef6a88230111c6b82a9432f45adad0f295a..a71bd57010a487cc7b7df90202a233c34b9a1f11 100644 (file)
@@ -301,19 +301,21 @@ out:
 /*
  * Insert a write request into an inode
  */
-static inline void
+static inline int
 nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
 {
        struct nfs_inode *nfsi = NFS_I(inode);
-       if (!list_empty(&req->wb_hash))
-               return;
-       if (!NFS_WBACK_BUSY(req))
-               printk(KERN_ERR "NFS: unlocked request attempted hashed!\n");
-       if (list_empty(&nfsi->writeback))
+       int error;
+
+       error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req);
+       BUG_ON(error == -EEXIST);
+       if (error)
+               return error;
+       if (!nfsi->npages)
                igrab(inode);
        nfsi->npages++;
-       list_add(&req->wb_hash, &nfsi->writeback);
        req->wb_count++;
+       return 0;
 }
 
 /*
@@ -324,21 +326,14 @@ nfs_inode_remove_request(struct nfs_page *req)
 {
        struct nfs_inode *nfsi;
        struct inode *inode;
+
+       BUG_ON (!NFS_WBACK_BUSY(req));
        spin_lock(&nfs_wreq_lock);
-       if (list_empty(&req->wb_hash)) {
-               spin_unlock(&nfs_wreq_lock);
-               return;
-       }
-       if (!NFS_WBACK_BUSY(req))
-               printk(KERN_ERR "NFS: unlocked request attempted unhashed!\n");
        inode = req->wb_inode;
-       list_del(&req->wb_hash);
-       INIT_LIST_HEAD(&req->wb_hash);
        nfsi = NFS_I(inode);
+       radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
        nfsi->npages--;
-       if ((nfsi->npages == 0) != list_empty(&nfsi->writeback))
-               printk(KERN_ERR "NFS: desynchronized value of nfs_i.npages.\n");
-       if (list_empty(&nfsi->writeback)) {
+       if (!nfsi->npages) {
                spin_unlock(&nfs_wreq_lock);
                iput(inode);
        } else
@@ -354,19 +349,12 @@ static inline struct nfs_page *
 _nfs_find_request(struct inode *inode, unsigned long index)
 {
        struct nfs_inode *nfsi = NFS_I(inode);
-       struct list_head        *head, *next;
-
-       head = &nfsi->writeback;
-       next = head->next;
-       while (next != head) {
-               struct nfs_page *req = nfs_inode_wb_entry(next);
-               next = next->next;
-               if (req->wb_index != index)
-                       continue;
+       struct nfs_page *req;
+
+       req = (struct nfs_page*)radix_tree_lookup(&nfsi->nfs_page_tree, index);
+       if (req)
                req->wb_count++;
-               return req;
-       }
-       return NULL;
+       return req;
 }
 
 static struct nfs_page *
@@ -437,8 +425,8 @@ static int
 nfs_wait_on_requests(struct inode *inode, struct file *file, unsigned long idx_start, unsigned int npages)
 {
        struct nfs_inode *nfsi = NFS_I(inode);
-       struct list_head        *p, *head;
-       unsigned long           idx_end;
+       struct nfs_page *req;
+       unsigned long           idx_end, next;
        unsigned int            res = 0;
        int                     error;
 
@@ -448,21 +436,17 @@ nfs_wait_on_requests(struct inode *inode, struct file *file, unsigned long idx_s
                idx_end = idx_start + npages - 1;
 
        spin_lock(&nfs_wreq_lock);
-       head = &nfsi->writeback;
-       p = head->next;
-       while (p != head) {
-               struct nfs_page *req = nfs_inode_wb_entry(p);
-
-               p = p->next;
+       next = idx_start;
+       while (radix_tree_gang_lookup(&nfsi->nfs_page_tree, (void **)&req, next, 1)) {
+               if (req->wb_index > idx_end)
+                       break;
 
+               next = req->wb_index + 1;
                if (file && req->wb_file != file)
                        continue;
-
-               if (req->wb_index < idx_start || req->wb_index > idx_end)
-                       continue;
-
                if (!NFS_WBACK_BUSY(req))
                        continue;
+
                req->wb_count++;
                spin_unlock(&nfs_wreq_lock);
                error = nfs_wait_on_request(req);
@@ -470,7 +454,7 @@ nfs_wait_on_requests(struct inode *inode, struct file *file, unsigned long idx_s
                if (error < 0)
                        return error;
                spin_lock(&nfs_wreq_lock);
-               p = head->next;
+               next = idx_start;
                res++;
        }
        spin_unlock(&nfs_wreq_lock);
@@ -664,8 +648,14 @@ nfs_update_request(struct file* file, struct inode *inode, struct page *page,
                }
 
                if (new) {
+                       int error;
                        nfs_lock_request_dontget(new);
-                       nfs_inode_add_request(inode, new);
+                       error = nfs_inode_add_request(inode, new);
+                       if (error) {
+                               spin_unlock(&nfs_wreq_lock);
+                               nfs_unlock_request(new);
+                               return ERR_PTR(error);
+                       }
                        spin_unlock(&nfs_wreq_lock);
                        nfs_mark_request_dirty(new);
                        return new;
index 8a25d03a14ca110c0cf270c3a4dd141161c126d8..e0015150f67d5a1a977d992be5833da62a1e6e8e 100644 (file)
@@ -165,7 +165,7 @@ struct nfs_inode {
         */
        struct list_head        dirty;
        struct list_head        commit;
-       struct list_head        writeback;
+       struct radix_tree_root  nfs_page_tree;
 
        unsigned int            ndirty,
                                ncommit,
@@ -356,7 +356,7 @@ nfs_commit_file(struct inode *inode, struct file *file, unsigned long offset,
 static inline int
 nfs_have_writebacks(struct inode *inode)
 {
-       return !list_empty(&NFS_I(inode)->writeback);
+       return NFS_I(inode)->npages != 0;
 }
 
 static inline int
index 029818ff08703b7248a510911c83863616fc0eb8..262bbc5d4c9b7987b8f6c988e266e1d10359c256 100644 (file)
@@ -23,8 +23,7 @@
 #define PG_BUSY                        0
 
 struct nfs_page {
-       struct list_head        wb_hash,        /* Inode */
-                               wb_lru,         /* superblock lru list */
+       struct list_head        wb_lru,         /* superblock lru list */
                                wb_list,        /* Defines state of page: */
                                *wb_list_head;  /*      read/write/commit */
        struct file             *wb_file;
@@ -125,12 +124,6 @@ nfs_list_entry(struct list_head *head)
        return list_entry(head, struct nfs_page, wb_list);
 }
 
-static inline struct nfs_page *
-nfs_inode_wb_entry(struct list_head *head)
-{
-       return list_entry(head, struct nfs_page, wb_hash);
-}
-
 static inline void
 __nfs_add_lru(struct list_head *head, struct nfs_page *req)
 {